[Johansen Cointegration Test: Learn How to Implement it in Python](https://blog.quantinsti.com/johansen-test-cointegration-building-stationary-portfolio/)

In [9]:
import numpy as np
import pandas as pd
import random
from itertools import combinations
from statsmodels.tsa.vector_ar.vecm import coint_johansen

In [14]:
# Import price data
df = pd.read_csv('prices.txt', engine='python', sep='   ', header=None, names=[f"stock{i}" for i in range(50)])

# Johansen test takes at most 12 variables
rand_stocks = random.sample(df.columns.to_list(), 12)
df = df[rand_stocks]
df

Unnamed: 0,stock22,stock6,stock48,stock41,stock37,stock8,stock31,stock14,stock29,stock42,stock32,stock19
0,64.79,18.98,49.00,55.76,35.54,69.49,62.29,16.47,32.06,14.46,57.81,34.18
1,64.90,18.95,48.84,55.97,35.62,69.73,62.11,16.54,32.09,14.44,57.85,34.38
2,65.55,18.98,48.56,56.34,35.98,69.60,62.37,16.65,32.18,14.50,57.89,34.45
3,65.23,18.96,49.00,56.32,35.91,69.54,62.39,16.61,32.21,14.40,58.38,34.66
4,65.76,18.89,48.14,56.32,36.24,69.68,62.27,16.51,32.20,14.36,58.68,34.60
...,...,...,...,...,...,...,...,...,...,...,...,...
495,67.29,18.05,33.83,75.82,36.51,69.00,69.74,13.23,29.45,10.63,50.30,27.91
496,65.69,17.99,33.42,76.46,36.76,69.04,69.27,13.15,29.49,10.65,50.73,28.15
497,65.86,18.02,33.75,75.16,36.66,68.95,69.47,13.22,29.45,10.75,50.54,28.08
498,65.64,18.03,33.53,76.09,36.63,69.03,68.85,13.38,29.42,10.68,50.51,28.25


### Testing if any cointegrating relationships exist

In [15]:
# Perform the Johansen Cointegration Test with a specified number of zeros
specified_number = 0    # Testing for zero-cointegrating relationships
coint_test_result = coint_johansen(df, specified_number, 1)

# Extract the trace statistics and eigen statistics
trace_stats = coint_test_result.lr1
eigen_stats = coint_test_result.lr2

# Print test results
print("Johansen Cointegration Test Results (Testing for Zero Cointegrating Relationships):")
print(f"Trace Statistics:")
for i, interval in enumerate(['90%', '95%', '99%']):
    print(f"\tConfidence Level {interval}: {coint_test_result.lr1[i]}")
print(f"Critical Values:")
for i, interval in enumerate(['90%', '95%', '99%']):
    print(f"\tConfidence Level {interval}: {coint_test_result.cvt[i]}")

Johansen Cointegration Test Results (Testing for Zero Cointegrating Relationships):
Trace Statistics:
	Confidence Level 90%: 313.2318119869827
	Confidence Level 95%: 233.35604768924546
	Confidence Level 99%: 185.1649693821669
Critical Values:
	Confidence Level 90%: [326.5354 334.9795 351.215 ]
	Confidence Level 95%: [277.374  285.1402 300.2821]
	Confidence Level 99%: [232.103  239.2468 253.2526]


In [16]:
# Separate output sections
print('\n' + ('-' * 50) + '\n')

# Print cointegration for stock pairs
for i, (s1, s2) in enumerate(combinations(rand_stocks, 2)):
    trace_statistic = trace_stats[i]
    eigen_statistic = eigen_stats[i]
    print(f"Pair {i + 1} ({s1} and {s2}):")
    print(f"Trace Statistic: {trace_statistic}")
    print(f"Eigen Statistic: {eigen_statistic}")
    print("\n" + "-" * 50 + "\n")

# Determine cointegration based on critical values or other criteria
# Add your cointegration assessment logic here
print("Cointegration Assessment: Testing for Zero Cointegrating Relationships (Null Hypothesis)\n")


--------------------------------------------------

Pair 1 (stock22 and stock6):
Trace Statistic: 313.2318119869827
Eigen Statistic: 79.87576429773725

--------------------------------------------------

Pair 2 (stock22 and stock48):
Trace Statistic: 233.35604768924546
Eigen Statistic: 48.19107830707857

--------------------------------------------------

Pair 3 (stock22 and stock41):
Trace Statistic: 185.1649693821669
Eigen Statistic: 38.9607513244504

--------------------------------------------------

Pair 4 (stock22 and stock37):
Trace Statistic: 146.20421805771647
Eigen Statistic: 34.07468092809528

--------------------------------------------------

Pair 5 (stock22 and stock8):
Trace Statistic: 112.12953712962121
Eigen Statistic: 26.62665522403478

--------------------------------------------------

Pair 6 (stock22 and stock31):
Trace Statistic: 85.50288190558643
Eigen Statistic: 21.90272981082456

--------------------------------------------------

Pair 7 (stock22 and stock14):

IndexError: index 12 is out of bounds for axis 0 with size 12

### Testing the exact number of cointegrating relationships

In [17]:
# Perform the Johansen Cointegration Test for all pairs
coint_test_result = coint_johansen(df, det_order=0, k_ar_diff=1)

# Extract the eigenvalues and critical values
tracevalues = coint_test_result.lr1
critical_values = coint_test_result.cvt

# Interpret the results for each pair
for i, (stock1, stock2) in enumerate(combinations(rand_stocks, 2)):
    if (tracevalues[i] > critical_values[:, 1]).all():
        print(f"Pair {i + 1} ({stock1} and {stock2}) is cointegrated.")
    else:
        print(f"Pair {i + 1} ({stock1} and {stock2}) is not cointegrated.")

Pair 1 (stock22 and stock6) is not cointegrated.
Pair 2 (stock22 and stock48) is not cointegrated.
Pair 3 (stock22 and stock41) is not cointegrated.
Pair 4 (stock22 and stock37) is not cointegrated.
Pair 5 (stock22 and stock8) is not cointegrated.
Pair 6 (stock22 and stock31) is not cointegrated.
Pair 7 (stock22 and stock14) is not cointegrated.
Pair 8 (stock22 and stock29) is not cointegrated.
Pair 9 (stock22 and stock42) is not cointegrated.
Pair 10 (stock22 and stock32) is not cointegrated.
Pair 11 (stock22 and stock19) is not cointegrated.
Pair 12 (stock6 and stock48) is not cointegrated.


IndexError: index 12 is out of bounds for axis 0 with size 12