## 1. Import Libraries

In [10]:
import os
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd
from scipy.stats.stats import pearsonr

In [11]:
os.chdir("/Users/weixue/Desktop/BT4013/Project/")

from data_processing.indicators import macd, rsi, atr
from systems.systems_util import get_futures_list
from utils.data_loader import load_raw_data

## 2. Define Helper Functions

In [12]:
def align_index(series_1, series_2) -> Tuple[pd.Series, pd.Series]:
    """
    Aligns the datetime index of the two input series.
    """
    union_index = series_1.index.intersection(series_2.index)
    return series_1.loc[union_index], series_2.loc[union_index]

In [13]:
def compute_acf_pvalues(series: pd.Series, ticker: str):
    """
    Takes in a time series and computes the p-values of its autocorrelations.
    Returns a series of p-values with the indexes representing the number of time lags.
    """
    lag_to_p_value_map = {}
    
    for lag in range(1, 10):
        lagged_series = series.shift(lag).dropna()
        lagged_series, original_series = align_index(lagged_series, series.copy())
        p_value = pearsonr(lagged_series, original_series)[1]
        lag_to_p_value_map[lag] = round(p_value, 3)
        
    correlation_series = pd.Series(lag_to_p_value_map)
    correlation_series.name = ticker
    
    return correlation_series

## 3. Compute ACFs of Daily Returns for Each Asset

In [14]:
futures_list = get_futures_list()

In [7]:
result = []

for ticker in futures_list:
    data = load_raw_data(ticker)
    close_price_diff = data["CLOSE"].pct_change()
    acf_pvalues = compute_acf_pvalues(series=close_price_diff, ticker=ticker)
    result.append(acf_pvalues)

acf_pvalues_table = pd.DataFrame(result)
acf_pvalues_table.index.name = "ASSETS"
acf_pvalues_table.columns.name = "TIME LAG"

acf_pvalues_table

TIME LAG,1,2,3,4,5,6,7,8,9
ASSETS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F_AD,0.040,0.586,0.015,0.078,0.044,0.031,0.276,0.306,0.536
F_BO,0.050,0.615,0.360,0.930,0.423,0.059,0.338,0.314,0.770
F_BP,0.081,0.673,0.566,0.878,0.532,0.779,0.817,0.796,0.403
F_C,0.007,0.062,0.819,0.577,0.349,0.053,0.958,0.987,0.055
F_CC,0.203,0.213,0.407,0.933,0.629,0.611,0.834,0.942,0.658
...,...,...,...,...,...,...,...,...,...
F_VF,0.478,0.400,0.026,0.223,0.963,0.132,0.220,0.845,0.759
F_VT,0.243,0.089,0.008,0.093,0.666,0.590,0.030,0.269,0.840
F_VW,0.000,0.229,0.000,0.764,0.562,0.068,0.872,0.466,0.219
F_GD,0.000,0.559,0.914,0.551,0.964,0.236,0.014,0.672,0.637


## 4. Keep Only Assets with Statistically Significant Autocorrelation

In [18]:
# Lag-1 ACF is significant
significant_assets = acf_pvalues_table[acf_pvalues_table[1] < 0.05].index.tolist()
print(significant_assets)
print(len(significant_assets))

['F_AD', 'F_C', 'F_DX', 'F_ED', 'F_ES', 'F_FC', 'F_HG', 'F_LB', 'F_LC', 'F_MD', 'F_NG', 'F_NQ', 'F_NR', 'F_O', 'F_PA', 'F_PL', 'F_RB', 'F_RU', 'F_SB', 'F_TU', 'F_XX', 'F_YM', 'F_AX', 'F_CA', 'F_UB', 'F_LX', 'F_SS', 'F_DL', 'F_ZQ', 'F_VX', 'F_BC', 'F_LU', 'F_AH', 'F_DZ', 'F_FL', 'F_FM', 'F_FY', 'F_GX', 'F_HP', 'F_LR', 'F_LQ', 'F_NY', 'F_RF', 'F_SH', 'F_SX', 'F_EB', 'F_VW', 'F_GD', 'F_F']
49


In [19]:
# Both lag-1 & lag-2 ACF are significant
significant_assets = acf_pvalues_table[(acf_pvalues_table[1] < 0.05) & (acf_pvalues_table[2] < 0.05)].index.tolist()
print(significant_assets)
print(len(significant_assets))

['F_ED', 'F_ES', 'F_NQ', 'F_O', 'F_SB', 'F_LX', 'F_SS', 'F_VX', 'F_LU', 'F_LQ', 'F_RF', 'F_EB', 'F_F']
13


In [24]:
acf_pvalues_table.loc[significant_assets]

TIME LAG,1,2,3,4,5,6,7,8,9
ASSETS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F_ED,0.0,0.0,0.001,0.645,0.0,0.001,0.082,0.526,0.194
F_ES,0.0,0.004,0.635,0.041,0.063,0.012,0.214,0.076,0.2
F_NQ,0.0,0.0,0.805,0.933,0.145,0.042,0.006,0.005,0.718
F_O,0.0,0.031,0.962,0.079,0.001,0.021,0.123,0.323,0.877
F_SB,0.042,0.023,0.012,0.116,0.307,0.199,0.367,0.776,0.014
F_LX,0.002,0.003,0.001,0.018,0.072,0.0,0.025,0.082,0.241
F_SS,0.0,0.0,0.445,0.122,0.018,0.038,0.661,0.088,0.622
F_VX,0.0,0.025,0.814,0.043,0.309,0.038,0.623,0.507,0.44
F_LU,0.0,0.027,0.181,0.246,0.075,0.002,0.487,0.332,0.63
F_LQ,0.0,0.019,0.526,0.752,0.672,0.068,0.686,0.064,0.059


In [39]:
lol = (acf_pvalues_table[[1, 2, 3]] < 0.05).sum(axis=1)
lol[lol >= 2]

ASSETS
F_AD    2
F_CL    2
F_ED    3
F_ES    2
F_FC    2
F_KC    2
F_NQ    2
F_O     2
F_PA    2
F_PL    2
F_SB    3
F_YM    2
F_CA    2
F_UB    2
F_LX    3
F_SS    2
F_VX    2
F_LU    2
F_LQ    2
F_RF    2
F_SX    2
F_EB    2
F_VW    2
F_F     3
dtype: int64

In [28]:
for i in range(1, 10):
    col = (acf_pvalues_table < 0.05).sum(axis=1)
    print(col)

SyntaxError: unexpected EOF while parsing (<ipython-input-28-6df1f476ba84>, line 1)