In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
sys.path.append(os.getcwd() + '/modules')
import seaborn as sns

from modules.directional_change import get_data, get_DC_data, get_DC_data_v2, get_TMV, get_T, get_R
from modules.hidden_markov_model import fit_hmm, standardize_regime_labels
from modules.cross_validation import CustomCrossValidation, Pipeline

plt.style.use('seaborn')
sns.set_theme()

In [3]:
"""Hyperparameters"""

TYPE = 'equity' # 'equity' or 'fx' or 'bond'

TICKER_MAPPER = {'equity':['^DJI','^GSPC','^IXIC'],
                 'bond':['^TNX', '^IRX'],
                 'fx':['RUB=X','GBP=X','JPY=X']}

TICKERS = TICKER_MAPPER[TYPE]
START_DATE = "2005-01-01"
THETA = 0.005
NUM_REGIMES = 2

TRADING_DAY = {'equity':6.5, 'fx':12,'bond':9} # number of hours in a day we offset
TEST_TICKER = TICKERS[-1] # if we need to debug for a single ticker

In [4]:
df_ts = get_data(TICKERS, START_DATE, TRADING_DAY[TYPE]/2)
df_ret = df_ts.pct_change().dropna()

dcc_data = {}
for ticker in TICKERS:
    dcc_data[ticker] = get_DC_data_v2(df_ts[ticker], THETA)

[*********************100%***********************]  3 of 3 completed


In [5]:
df_ts

Unnamed: 0_level_0,^DJI,^GSPC,^IXIC
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2005-01-03 00:00:00,10729.429688,1202.079956,2152.149902
2005-01-03 03:15:00,10783.750000,1211.920044,2184.750000
2005-01-04 00:00:00,10630.780273,1188.050049,2107.860107
2005-01-04 03:15:00,10727.809570,1202.079956,2158.310059
2005-01-05 00:00:00,10597.830078,1183.739990,2091.239990
...,...,...,...
2022-12-28 03:15:00,33264.761719,3829.560059,10339.200195
2022-12-29 00:00:00,33220.800781,3849.280029,10478.089844
2022-12-29 03:15:00,33021.429688,3805.449951,10321.459961
2022-12-30 00:00:00,33147.250000,3839.500000,10466.480469


In [6]:
dcc_data['^DJI']

[(Timestamp('2005-01-03 03:15:00'),
  10783.75,
  Timestamp('2005-01-03 00:00:00'),
  10729.4296875),
 (Timestamp('2005-01-04 00:00:00'),
  10630.7802734375,
  Timestamp('2005-01-03 03:15:00'),
  10783.75),
 (Timestamp('2005-01-04 03:15:00'),
  10727.8095703125,
  Timestamp('2005-01-04 00:00:00'),
  10630.7802734375),
 (Timestamp('2005-01-05 00:00:00'),
  10597.830078125,
  Timestamp('2005-01-04 03:15:00'),
  10727.8095703125),
 (Timestamp('2005-01-11 03:15:00'),
  10619.76953125,
  Timestamp('2005-01-11 00:00:00'),
  10556.2197265625),
 (Timestamp('2005-01-12 03:15:00'),
  10561.3203125,
  Timestamp('2005-01-11 03:15:00'),
  10619.76953125),
 (Timestamp('2005-01-13 03:15:00'),
  10617.41015625,
  Timestamp('2005-01-13 00:00:00'),
  10505.830078125),
 (Timestamp('2005-01-14 00:00:00'),
  10558.0,
  Timestamp('2005-01-13 03:15:00'),
  10617.41015625),
 (Timestamp('2005-01-18 00:00:00'),
  10628.7900390625,
  Timestamp('2005-01-14 03:15:00'),
  10506.7099609375),
 (Timestamp('2005-01-18 

In [7]:
_ticker = '^DJI'
r = get_T(dcc_data[ticker])
regimes, model = fit_hmm(NUM_REGIMES, df_ts[ticker], r, ticker, plot = False)

In [8]:
regimes

2005-01-03 03:15:00    1
2005-01-04 00:00:00    1
2005-01-04 03:15:00    1
2005-01-05 00:00:00    1
2005-01-05 03:15:00    1
                      ..
2022-12-27 03:15:00    1
2022-12-28 00:00:00    1
2022-12-29 00:00:00    1
2022-12-29 03:15:00    1
2022-12-30 00:00:00    1
Length: 4491, dtype: int64

In [9]:
regimes.index[161]

Timestamp('2005-08-08 00:00:00')

In [10]:
regimes[161]

0

In [11]:
(regimes.index[97] - regimes.index[0]).total_seconds()

11577600.0

In [12]:
(regimes.index[153] - regimes.index[99]).total_seconds() + (regimes.index[97] - regimes.index[0]).total_seconds() + (regimes.index[161] - regimes.index[154]).total_seconds() + (regimes.index[243] - regimes.index[163]).total_seconds()

28080000.0

In [13]:
regimes.index[1].to_pydatetime().total_seconds() + regimes.index[0].to_pydatetime().total_seconds()  + regimes.index[2].to_pydatetime().total_seconds()

AttributeError: 'datetime.datetime' object has no attribute 'total_seconds'

In [None]:
# (regimes.index[1].to_pydatetime() - regimes.index[0].to_pydatetime()).total_seconds()
(regimes.index[1] - regimes.index[0]).total_seconds()

In [None]:
int (not regimes[0])

In [None]:
regimes_copy = regimes.copy()
regimes_copy[:]

In [None]:
regimes_copy = standardize_regime_labels(regimes_copy)

In [None]:
regimes_copy

In [None]:
start = regimes.index[0]
end = regimes.index[0]
initial_regime = regimes[0]
total_duration_in_initial_regime = 0
in_second_regime = False
for time, regime in regimes[1:250].items():
    if regime != initial_regime:
        if not in_second_regime:
            total_duration_in_initial_regime += (time - start).total_seconds()
            in_second_regime = True
    else:
        if in_second_regime:
            start = time
            in_second_regime = False

total_duration = (regimes.index[-1] - regimes.index[0]).total_seconds()

if initial_regime == 0 and (total_duration_in_initial_regime / total_duration) <= 0.5:
    regimes = 1 - regimes

In [None]:
dcc_data

In [None]:
def regime_detection_pipeline(x_train, y_train=None, dc_metric='R', ticker='', theta=0.1):
    input_data = None
    if dc_metric == 'R':
        tmv = get_TMV(x_train[ticker],theta)
        t = get_T(x_train[ticker])
        input_data = get_R(tmv,t[ticker],theta)
    elif dc_metric == 'TMV':
        input_data = get_TMV(x_train[ticker],theta)
    else:
        input_data = get_T(x_train[ticker])

    regimes, model = fit_hmm(NUM_REGIMES, x_train[ticker], input_data, ticker, plot = False)
    NBC()

In [None]:
regimes, models = fit_hmm(NUM_REGIMES, x_train[ticker], input_data, ticker, plot = False)

In [None]:
data = get_data(['^GSPC'], '2010-01-01', 6.5)
theta = 0.10
DC = get_DC_data_v2( data, theta)

TMV = get_TMV(DC, theta)
T = get_T( DC )
R = get_R( TMV, T, theta )

regimes, model = fit_hmm(2, data, R, 'S&P500', plot = True)
r2 = standardize_regime_labels(regimes, True)

In [None]:
regimes

In [None]:
r2

In [15]:
parameter_grid = {
    'theta': [0.25, 0.0025]
}

In [21]:

trading_day = {'equity':6.5, 'fx':12,'bond':9}
df_ts = get_data('^GSPC', start_date='2005-01-01', delta=trading_day['equity'])
cv = CustomCrossValidation(pipeline_class=Pipeline, parameter_grid=parameter_grid, verbose=True)
cv.fit(data = df_ts, metric='sharpe', minimize=False)

[*********************100%***********************]  1 of 1 completed
Iteration: 0: Parameters: {'theta': 0.25}


TypeError: Cannot divide float64 data by TimedeltaArray

In [None]:

np.mean(nb.R['train'][nb.regimes==1]), np.var(nb.R['train'][nb.regimes==1])
np.mean(nb.R['train'][nb.regimes==1]), np.var(nb.R['train'][nb.regimes==1])
