In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import datetime
import sys
import os
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from pprint import pprint
from typing import Dict

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statsmodels.api as sm
from tqdm import tqdm

sys.path.append("../src")
sys.path.append("../scripts")

from model import RegimeSwitchingModel
from data import create_train_test_data

パス，変数，データ作成

In [3]:
# ディレクトリ
DATA_INPUT_DIR = Path("../data/input") # インプットデータディレクトリ
DATA_OUTPUT_DIR = Path("../data/output") # 出力データディレクトリ
# 分析条件
prediction_lag = 1
rolling_span = 250
# ファイル
endog_file = DATA_INPUT_DIR / "X.csv"
exog_file = DATA_INPUT_DIR / "Y.csv"
selected_factors_file = DATA_INPUT_DIR / f"Selected_Factors_Col{rolling_span}_lag{prediction_lag}.csv"

普通にMarkov Regression

$$
y_t = \mu_{S_t} + X^\prime_{t-1} \beta_{S_t} + \epsilon_{S_t}
$$

In [4]:
# データ作成
base_date = datetime.datetime(2022, 12, 8)
y_train, y_test, X_train, X_test = create_train_test_data(
    base_date,
    endog_file,
    exog_file,
    selected_factors_csv_file = selected_factors_file,
    prediction_lag = prediction_lag,
    rolling_span = rolling_span
)
# マルコフ回帰
rs = RegimeSwitchingModel(
    y_train,
    k_regimes=2,
    exog=X_train,
    trend='c',
    switching_trend=True,
    switching_exog=True,
    switching_variance=True
)
# パラメラ推定（モデル作成）
rs.fit(
    maxiter=1000,
    em_iter=1000,
    cov_type="approx",
    # method="bfgs",
    method="powell",
    search_reps=10,
    search_iter=10,
    disp=True
)
# 予測
pred = rs.forecast(X_test.to_numpy(), method="avg")

  self._init_dates(dates, freq)


Optimization terminated successfully.
         Current function value: -2.787371
         Iterations: 1
         Function evaluations: 527


In [5]:
pred

-0.005087655566136425

In [6]:
rs.res.summary()

0,1,2,3
Dep. Variable:,ITRX_Japan_Return,No. Observations:,250.0
Model:,RegimeSwitchingModel,Log Likelihood,696.843
Date:,"Sat, 04 Feb 2023",AIC,-1325.685
Time:,22:50:55,BIC,-1205.956
Sample:,0,HQIC,-1277.498
,- 250,,
Covariance Type:,approx,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,-0.0015,0.001,-1.935,0.053,-0.003,1.91e-05
x1,0.0308,3.63e-05,846.702,0.000,0.031,0.031
x2,0.0395,1.73e-05,2279.951,0.000,0.039,0.040
x3,0.0006,0.001,0.765,0.444,-0.001,0.002
x4,0.0772,2.17e-06,3.57e+04,0.000,0.077,0.077
x5,0.0017,0.001,1.632,0.103,-0.000,0.004
x6,0.0063,3.45e-05,184.279,0.000,0.006,0.006
x7,5.291e-11,,,,,
x8,0.0048,0.000,10.703,0.000,0.004,0.006

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
const,0.0055,4.48e-05,122.360,0.000,0.005,0.006
x1,-0.2533,9.25e-06,-2.74e+04,0.000,-0.253,-0.253
x2,-0.3586,2.3e-06,-1.56e+05,0.000,-0.359,-0.359
x3,-0.0054,0.000,-12.639,0.000,-0.006,-0.005
x4,0.9017,3.47e-06,2.6e+05,0.000,0.902,0.902
x5,-0.0054,0.000,-49.511,0.000,-0.006,-0.005
x6,0.0201,3.95e-06,5082.805,0.000,0.020,0.020
x7,3.95e-10,1.79e-10,2.208,0.027,4.43e-11,7.46e-10
x8,0.0031,0.001,4.114,0.000,0.002,0.005

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
p[0->0],0.8823,4.14e-06,2.13e+05,0.000,0.882,0.882
p[1->0],0.2998,1.58e-06,1.9e+05,0.000,0.300,0.300
