# Stationarity Testing

This notebook tests each stock for stationary data.
1. Price data is assumed to be an I(1) series while log returns are an I(0) series. This is verified through:
    1. ACF plot
    2. PACF plot
    3. ADF test
    4. KPSS test

In [19]:
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import warnings

from statsmodels.tsa.stattools import adfuller, kpss
from statsmodels.tools.sm_exceptions import InterpolationWarning
warnings.simplefilter('ignore', InterpolationWarning)

In [2]:
# Make plot directories
for dirname in ['acf-pacf/']:
    if not os.path.isdir(dirname):
        os.makedirs(dirname)

In [20]:
# Import data
stock_names = [f"stock{i}" for i in range(50)]
df = pd.read_csv('prices.txt', engine='python', sep='   ', header=None, names=stock_names)
df.head()

Unnamed: 0,stock0,stock1,stock2,stock3,stock4,stock5,stock6,stock7,stock8,stock9,...,stock40,stock41,stock42,stock43,stock44,stock45,stock46,stock47,stock48,stock49
0,13.46,71.65,48.46,50.52,52.1,13.0,18.98,47.71,69.49,49.96,...,32.64,55.76,14.46,58.94,36.71,52.62,49.33,36.22,49.0,56.09
1,13.48,72.1,48.52,50.5,52.06,12.95,18.95,47.84,69.73,49.93,...,32.52,55.97,14.44,59.81,36.64,52.58,49.2,36.27,48.84,56.08
2,13.47,72.35,48.48,50.62,51.8,12.79,18.98,47.98,69.6,49.33,...,32.48,56.34,14.5,59.04,36.89,52.49,49.48,36.39,48.56,55.9
3,13.53,72.51,48.42,50.75,51.66,12.66,18.96,48.74,69.54,49.67,...,32.59,56.32,14.4,58.73,36.94,52.4,49.42,36.41,49.0,56.14
4,13.64,71.99,48.4,50.65,51.97,12.62,18.89,48.88,69.68,49.46,...,32.64,56.32,14.36,59.01,37.03,52.44,49.79,36.42,48.14,55.9


### Autocorrelation + Partial Autocorrelation Plots

In [None]:
def acf_pacf_plot(data, lags=20):
    fig, ax = plt.subplots(nrows=1, ncols=2, figsize=(10,5))
    sm.graphics.tsa.plot_acf(data.values.squeeze(), ax=ax[0], lags=lags)
    sm.graphics.tsa.plot_pacf(data.values.squeeze(), ax=ax[1], lags=lags)
    plt.suptitle(data.name)
    fig.savefig(f"acf-pacf/{data.name}.png")
    plt.close(fig)

# Plot ACF/PACF of all stocks
for stock in stocks:
    acf_pacf_plot(df[stock])

### Stationarity Test

In [21]:
def adf_test(series, significance=0.05) -> bool:
    test = adfuller(series, autolag='AIC')

    if significance <= 0.01:
        confidence = '1%'
    elif significance <= 0.05:
        confidence = '5%'
    else:
        confidence = '10%'

    return test[1] <= significance and test[0] <= test[4][confidence]

def kpss_test(series, significance=0.05) -> bool:
    test = kpss(series, regression='c', nlags='auto', )

    if significance <= 0.01:
        confidence = '1%'
    elif significance <= 0.025:
        confidence = '2.5%'
    elif significance <= 0.05:
        confidence = '5%'
    else:
        confidence = '10%'

    return test[1] <= significance and test[0] <= test[3][confidence]

def is_stationary(series, significance=0.05):
    adf = adf_test(series, significance)
    kpss = kpss_test(series, significance)
    
    if adf and kpss:
        return 'stationary'
    elif kpss:
        return 'trend'
    elif adf:
        return 'difference'
    else:
        return 'non-stationary'

In [22]:
prices = []
log_returns = []
lag_1s = []
lag_2s = []
lag_3s = []

for stock_name, price_data in df.items():
    log_return = np.log1p(price_data.pct_change()).dropna()
    lag_1 = price_data.diff(periods=1).dropna()
    lag_2 = price_data.diff(periods=2).dropna()
    lag_3 = price_data.diff(periods=3).dropna()

    prices.append(is_stationary(price_data))
    log_returns.append(is_stationary(log_return))
    lag_1s.append(is_stationary(lag_1))
    lag_2s.append(is_stationary(lag_2))
    lag_3s.append(is_stationary(lag_3))

pd.DataFrame({'Price': prices, 'Log Returns': log_returns, 'Lag-1': lag_1s, 'Lag-2': lag_2s, 'Lag-3': lag_3s}, index=stock_names)

Unnamed: 0,Price,Log Returns,Lag-1,Lag-2,Lag-3
stock0,non-stationary,difference,difference,difference,difference
stock1,non-stationary,difference,difference,difference,difference
stock2,non-stationary,difference,difference,difference,difference
stock3,non-stationary,difference,difference,difference,difference
stock4,non-stationary,difference,difference,difference,difference
stock5,non-stationary,difference,difference,difference,difference
stock6,non-stationary,difference,difference,difference,difference
stock7,non-stationary,difference,difference,difference,difference
stock8,difference,difference,difference,difference,difference
stock9,non-stationary,difference,difference,difference,difference


# Hurst Exponent

In [31]:
import numpy as np
import matplotlib.pyplot as plt
from hurst import compute_Hc

def hurst(stock_name, window=-100):
    # Evaluate Hurst equation
    stock_prices = df[stock_name].iloc[window:]
    H, c, data = compute_Hc(stock_prices, kind='price', simplified=True)

    # print("H={:.4f}, c={:.4f}".format(H,c))
    return H, c

def interpret(H, threshold=0.3):
    if H < threshold:
        return('anti-persistent behaviour')
    if H > (1 - threshold):
        return('persistent behaviour')
    return('Brownian Motion')

results = [hurst(stock_name, -250) for stock_name in df.columns]
hurst_df = pd.DataFrame(results, columns=['H', 'c'])
hurst_df['behaviour'] = hurst_df.apply(lambda row: interpret(row['H'], 0.4), axis=1)
hurst_df

Unnamed: 0,H,c,behaviour
0,0.846762,0.564462,persistent behaviour
1,0.571684,1.283309,Brownian Motion
2,0.657656,0.87606,persistent behaviour
3,0.659046,0.828771,persistent behaviour
4,0.495511,1.188831,Brownian Motion
5,0.821999,0.565808,persistent behaviour
6,0.713595,0.812322,persistent behaviour
7,0.542719,1.179717,Brownian Motion
8,0.497729,1.296357,Brownian Motion
9,0.458486,1.474364,Brownian Motion
