## 1. Import Libraries

In [1]:
import os
from pathlib import Path
from typing import Tuple

import numpy as np
import pandas as pd
from scipy.stats.stats import pearsonr

In [2]:
os.chdir("/Users/weixue/Desktop/BT4013/Project/")

from data_processing.indicators import macd, rsi, atr
from systems.systems_util import get_futures_list
from utils.data_loader import load_raw_data

## 2. Define Helper Functions

In [3]:
def align_index(series_1, series_2) -> Tuple[pd.Series, pd.Series]:
    """
    Aligns the datetime index of the two input series.
    """
    union_index = series_1.index.intersection(series_2.index)
    return series_1.loc[union_index], series_2.loc[union_index]

In [4]:
def compute_acf_pvalues(series: pd.Series, ticker: str):
    """
    Takes in a time series and computes the p-values of its autocorrelations.
    Returns a series of p-values with the indexes representing the number of time lags.
    """
    lag_to_p_value_map = {}
    
    for lag in range(1, 10):
        lagged_series = series.shift(lag).dropna()
        lagged_series, original_series = align_index(lagged_series, series.copy())
        p_value = pearsonr(lagged_series, original_series)[1]
        lag_to_p_value_map[lag] = round(p_value, 3)
        
    correlation_series = pd.Series(lag_to_p_value_map)
    correlation_series.name = ticker
    
    return correlation_series

## 3. Compute ACFs of Daily Returns for Each Asset

In [5]:
futures_list = get_futures_list()

In [10]:
result = []

for ticker in futures_list:
    data = data[:"2020-12-31"]
    data = load_raw_data(ticker)
    close_price_diff = data["CLOSE"].pct_change()
    acf_pvalues = compute_acf_pvalues(series=close_price_diff, ticker=ticker)
    result.append(acf_pvalues)

acf_pvalues_table = pd.DataFrame(result)
acf_pvalues_table.index.name = "ASSETS"
acf_pvalues_table.columns.name = "TIME LAG"

acf_pvalues_table

TIME LAG,1,2,3,4,5,6,7,8,9
ASSETS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
F_AD,0.040,0.586,0.015,0.078,0.044,0.031,0.276,0.306,0.536
F_BO,0.050,0.615,0.360,0.930,0.423,0.059,0.338,0.314,0.770
F_BP,0.081,0.673,0.566,0.878,0.532,0.779,0.817,0.796,0.403
F_C,0.007,0.062,0.819,0.577,0.349,0.053,0.958,0.987,0.055
F_CC,0.203,0.213,0.407,0.933,0.629,0.611,0.834,0.942,0.658
...,...,...,...,...,...,...,...,...,...
F_VF,0.478,0.400,0.026,0.223,0.963,0.132,0.220,0.845,0.759
F_VT,0.243,0.089,0.008,0.093,0.666,0.590,0.030,0.269,0.840
F_VW,0.000,0.229,0.000,0.764,0.562,0.068,0.872,0.466,0.219
F_GD,0.000,0.559,0.914,0.551,0.964,0.236,0.014,0.672,0.637


## 4. Keep Only Assets with Statistically Significant Autocorrelation

In [15]:
# Lag-1 ACF is significant
significant_assets_lag1 = acf_pvalues_table[acf_pvalues_table[1] < 0.05].index.tolist()
print(sorted(significant_assets_lag1))
print(len(significant_assets_lag1))

['F_AD', 'F_AH', 'F_AX', 'F_BC', 'F_C', 'F_CA', 'F_DL', 'F_DX', 'F_DZ', 'F_EB', 'F_ED', 'F_ES', 'F_F', 'F_FC', 'F_FL', 'F_FM', 'F_FY', 'F_GD', 'F_GX', 'F_HG', 'F_HP', 'F_LB', 'F_LC', 'F_LQ', 'F_LR', 'F_LU', 'F_LX', 'F_MD', 'F_NG', 'F_NQ', 'F_NR', 'F_NY', 'F_O', 'F_PA', 'F_PL', 'F_RB', 'F_RF', 'F_RU', 'F_SB', 'F_SH', 'F_SS', 'F_SX', 'F_TU', 'F_UB', 'F_VW', 'F_VX', 'F_XX', 'F_YM', 'F_ZQ']
49


In [16]:
# Both lag-1 & lag-2 ACF are significant
significant_assets_lag1_lag2 = \
acf_pvalues_table[(acf_pvalues_table[1] < 0.05) & (acf_pvalues_table[2] < 0.05)].index.tolist()
print(sorted(significant_assets_lag1_lag2))
print(len(significant_assets_lag1_lag2))

['F_EB', 'F_ED', 'F_ES', 'F_F', 'F_LQ', 'F_LU', 'F_LX', 'F_NQ', 'F_O', 'F_RF', 'F_SB', 'F_SS', 'F_VX']
13
