## 状態空間モデルで時系列予測を行う（cmdstanpy）

In [1]:
%load_ext lab_black

In [3]:
from cmdstanpy import CmdStanModel

print("Compile started")
model = CmdStanModel(stan_file="8schools.stan")
print("Compile finished")
print(model)
print("========================================")
data = "8schools.data.json"
print("Sampling started")
fit = model.sample(data=data)
print("Sampling finished")
print(fit)

INFO:cmdstanpy:compiling stan program, exe file: /work/notebooks/cmdstanpy/8schools
INFO:cmdstanpy:compiler options: stanc_options=None, cpp_options=None


Compile started


INFO:cmdstanpy:compiled model file: /work/notebooks/cmdstanpy/8schools
INFO:cmdstanpy:start chain 1


Compile finished
CmdStanModel: name=8schools
	 stan_file=/work/notebooks/cmdstanpy/8schools.stan
	 exe_file=/work/notebooks/cmdstanpy/8schools
	 compiler_optons=stanc_options=None, cpp_options=None
Sampling started


INFO:cmdstanpy:finish chain 1
INFO:cmdstanpy:start chain 2
INFO:cmdstanpy:finish chain 2
INFO:cmdstanpy:start chain 3
INFO:cmdstanpy:finish chain 3
INFO:cmdstanpy:start chain 4
INFO:cmdstanpy:finish chain 4


ValueError: csv file header mismatch, file /tmp/tmp35agx1de/8schools-202107250521-4-7lkbp9f9.csv, key start_datetime is 2021-07-25 05:21:53 UTC, expected 2021-07-25 05:21:54 UTC

In [7]:
# 必要なライブラリーのインポート
import numpy as np
import pandas as pd
from numpy.random import *
from scipy import stats
from cmdstanpy import cmdstan_path, CmdStanModel
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# グラフを横長にする
from matplotlib import rcParams

rcParams["figure.figsize"] = 10, 6
sns.set()

import warnings

warnings.filterwarnings("ignore")

In [3]:
# データの読み込み
# https://stat.ethz.ch/R-manual/R-devel/library/datasets/html/AirPassengers.html
df = pd.read_csv("../../data/AirPassengers.csv")

# float型に変換
# df['#Passengers'] = df['#Passengers'].astype('float64')
df = df.rename(columns={"#Passengers": "Passengers"})

# datetime型にしてインデックスにする
# df.Month = pd.to_datetime(df.Month)
# df = df.set_index("Month")

# データの中身を確認
df.head()

Unnamed: 0,Month,Passengers
0,1949-01,112
1,1949-02,118
2,1949-03,132
3,1949-04,129
4,1949-05,121


In [14]:
!ls

State-Space-Model-cmdstanpy.ipynb  local_level.stan


In [13]:
stan_model = CmdStanModel(stan_file='./local_level.stan')

INFO:cmdstanpy:compiling stan program, exe file: /work/notebooks/cmdstanpy/local_level
INFO:cmdstanpy:compiler options: stanc_options=None, cpp_options=None


ValueError: no CmdStan installation found, run command line script "install_cmdstan"

In [None]:
y = df["Passengers"]
T = 130 #学習期間
M = 14 #予測期間

y_train = y[:-M]
y_test = y[-M:]

In [None]:
y_train = y[:-M]
y_test = y[-M:]

In [None]:
predict_dat = {'T': T, 'M' : M, 'Y': y_train}

In [None]:
fit_local_level = stan_model.sampling(data=predict_dat, iter=3000, chains=1, seed=10, n_jobs=1)

In [None]:
fit_local_level

In [None]:
# サンプリング結果の抽出
ms_local_level = fit_local_level.extract()
y_pred = ms_local_level['y_pred'].mean(axis=0)

In [None]:
quantile = [5, 95]
per_5_95 = np.percentile(ms_local_level['y_pred'], q=quantile, axis=0).T
colname = ['p5', 'p95']
df_pred = pd.DataFrame(per_5_95, columns=colname)

In [None]:
df_pred

In [None]:
# 予測値を追加
df_pred['y_pred'] = y_pred

In [None]:
mu_hat = ms_local_level['mu'].mean(axis=0)

In [None]:
# 状態の推定値を追加
df_pred['mu_hat'] = np.nan
df_pred.loc[0:129,'mu_hat'] = mu_hat

In [None]:
df.plot(y="Passengers", legend=False) # 目的変数
plt.plot(df_pred[['p5','p95']][-14:], linestyle="dashed", color='purple') # 予測区間
plt.plot(df_pred[['y_pred']][-14:], color='red') # 予測値
plt.plot(mu_hat, color='green') # 状態
plt.show()