In [None]:
import numpy as np
import pandas as pd

import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint
# just set the seed for the random number generator
np.random.seed(107)

import matplotlib.pyplot as plt

In [None]:
X_returns = np.random.normal(0, 1, 100)
X = pd.Series(np.cumsum(X_returns), name='X') + 50
X.plot();

In [None]:
some_noise = np.random.normal(0, 1, 100)
Y = X + 5 + some_noise
Y.name = 'Y'
pd.concat([X, Y], axis=1).plot();

In [None]:
(Y - X).plot() # plot the spread
plt.axhline((Y - X).mean(), color='red', linestyle='--') # Add the mean
plt.xlabel('Time')
plt.legend(['Price Spread', 'Mean']);

In [None]:
coint(X, some_noise) # p value is low enough then we can reject the null hypothesis that the two series are cointegrated

In [None]:
# compute the p-value of the conintegration test
# will inform us as to whether the spread between the 2 timeseries is stationary
# around its mean
score, pvalue, _ = coint(X,Y)
if pvalue < 0.05:
    print('Likely conintegrated.')
else:
    print('Likely not conintegrated.')

In [None]:
X.corr(Y)

In [None]:
# Correlation withou cointegration
X_returns = np.random.normal(1, 1, 100)
Y_returns = np.random.normal(2, 1, 100)

X_diverging = pd.Series(np.cumsum(X_returns), name='X')
Y_diverging = pd.Series(np.cumsum(Y_returns), name='X')

pd.concat([X_diverging, Y_diverging], axis=1).plot();

In [None]:
print('Correlation: ' + str(X_diverging.corr(Y_diverging)))
score, pvalue, _ = coint(X_diverging, Y_diverging)
print('Cointegration test p-value: ' + str(pvalue))

In [None]:
# Cointegration Without Correlation
Y2 = pd.Series(np.random.normal(0, 1, 1000), name='Y2') + 20
Y3 = Y2.copy()

In [None]:
# Y2 = Y2 + 10
Y3[0:100] = 30 
Y3[100:200] = 10 
Y3[200:300] = 30
Y3[300:400] = 10 
Y3[400:500] = 30 
Y3[500:600] = 10 
Y3[600:700] = 30 
Y3[700:800] = 10 
Y3[800:900] = 30 
Y3[900:1000] = 10 

In [None]:
Y2.plot()
Y3.plot()
plt.ylim([0, 40]);

In [None]:
# correlation is nearly zero
print('Correlation: ' + str(Y2.corr(Y3)))
score, pvalue, _ = coint(Y2, Y3)
print('Cointegration test p-value: ' + str(pvalue))

In [None]:
tech = pd.read_csv('tech.csv')
tech = tech['Symbol'].to_list()

In [None]:
long_S1 = np.where((zscore(ratio) > 2) & (zscore(ratio) < 3), S2, np.where(zscore(ratio) > 3, S2, np.nan))
long_S2 = np.where((zscore(ratio) < -2) & (zscore(ratio) > -3), S1, np.where(zscore(ratio) < -3, S1, np.nan))
short_S2 = np.where((zscore(ratio) > 2) & (zscore(ratio) < 3), S1, np.where(zscore(ratio) > 3, S1, np.nan))
short_S1 = np.where((zscore(ratio) < -2) & (zscore(ratio) > -3), S2, np.where(zscore(ratio) < -3, S2, np.nan))
exit_S1 = np.where(((zscore(ratio) > 0) & (zscore(ratio) < 1/20 ) | (zscore(ratio) < 0) & (zscore(ratio) > -1/20)) , S1, np.nan)
exit_S2 = np.where(((zscore(ratio) > 0) & (zscore(ratio) < 1/20 ) | (zscore(ratio) < 0) & (zscore(ratio) > -1/20)) , S2, np.nan)

plt.figure(figsize=(15, 9))
S1.plot()
S2.plot()
plt.scatter(zscore(ratio).index, short_S2, color='purple', marker='^')
plt.scatter(zscore(ratio).index, short_S1, color='purple', marker='^')
plt.scatter(zscore(ratio).index, long_S1, color='green', marker='^')
plt.scatter(zscore(ratio).index, long_S2, color='green', marker='^')
plt.scatter(zscore(ratio).index, exit_S1, color='red', marker='v')
plt.scatter(zscore(ratio).index, exit_S2, color='red', marker='v')

plt.plot(zscore(ratio).index, zscore(ratio))
plt.axhline(0, color='black')
plt.axhline(2, color='red', linestyle='--', alpha=1)
plt.axhline(-2, color='green', linestyle='--', alpha=1)
plt.axhline(3, color='red', alpha=1)
plt.axhline(-3, color='green', alpha=1)

In [None]:
z.plot()
z_mean.plot()
z_std.plot()

In [None]:
S1.plot()
(hedgeRatio * S2).plot()
plt.scatter(S1.index, longs_S1, color='green', marker='^')
plt.scatter(S1.index, shorts_S1, color='red', marker='v')
plt.scatter(S2.index, hedgeRatio * longs_S2, color='green', marker='^')
plt.scatter(S2.index, hedgeRatio * shorts_S2, color='red', marker='v')

In [None]:
# correlation is nearly zero
print('Correlation: ' + str(S1.corr(S2)))
score, pvalue, _ = coint(S1, S2)
print('Cointegration test p-value: ' + str(pvalue))

In [None]:
results = sm.OLS(S2, S1).fit()
S1 = S1['ASTI']
b = results.params['ASTI']

spread = S2 - b * S1
spread.plot()
plt.axhline(spread.mean(), color='black')
plt.legend(['Spread']);

In [None]:
ratio = S1/S2
ratio.plot()
plt.axhline(ratio.mean(), color='black')
plt.legend(['Price Ratio']);