In [22]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [23]:
sys.path.insert(0, "../timeseries/")

In [105]:
import datetime
import pandas as pd
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.stattools import adfuller

import warnings
warnings.filterwarnings("ignore")

In [25]:
coins = pd.read_csv("coins.csv", index_col=0)
coins.index = pd.to_datetime(coins.index, format="%d-%m-%Y").date
series = coins["frax"].dropna()

In [26]:
in_start = datetime.date(2021, 1, 1)
in_end = datetime.date(2021, 5, 31)
out_start = datetime.date(2022, 6, 1)
out_end = datetime.date(2022, 7, 1)

insample = series.loc[in_start:in_end]
outsample = series.loc[out_start:out_end]

In [104]:
def adf(series):
    result = adfuller(series)
    print('ADF Statistic: {}'.format(result[0]))
    print('p-value: {}'.format(result[1]))
    print('Critical Values:')
    for key, value in result[4].items():
        print('\t{}: {}'.format(key, value))

In [106]:
adf(series)

ADF Statistic: -8.450060811593868
p-value: 1.6717501497116346e-13
Critical Values:
	1%: -3.442081300525009
	5%: -2.8667149792729596
	10%: -2.569526100127551


In [28]:
arima_parameters = []
rmse = {}

train_size = int(len(insample))
train, test = series[0:train_size], series[train_size:]
history = [x for x in insample]

p_values = range(0,4)
d_values = [0,1]
q_values = range(0,4)
for p in p_values:
        for d in d_values:
            for q in q_values:
                order = (p, d, q)
                arima_parameters.append(order)
                model = ARIMA(history, order=order)
                predictions = []
                for t in range(len(test)):
                    model_fit = model.fit()
                    yhat = model_fit.forecast()[0]
                    predictions.append(yhat)
                rmse[order] = mean_squared_error(test, predictions)

In [32]:
min_rmse = rmse[min(rmse, key=rmse.get)]
min(rmse, key=rmse.get), round(min_rmse,6)

((2, 1, 0), 2.1e-05)

In [107]:
max_rmse = rmse[max(rmse, key=rmse.get)]
max(rmse, key=rmse.get), round(max_rmse,6)

((2, 1, 3), 2.4e-05)

In [63]:
rmse_df = pd.DataFrame.from_dict(rmse, orient="index")

In [64]:
rmse_df = rmse_df.reset_index().rename(columns={"index":"order", 0:"RMSE"})

In [68]:
rmse_df.head()

Unnamed: 0,order,RMSE
0,"(0, 0, 0)",2.1e-05
1,"(0, 0, 1)",2.1e-05
2,"(0, 0, 2)",2.1e-05
3,"(0, 0, 3)",2.1e-05
4,"(0, 1, 0)",2.1e-05


### ARIMA model

In [69]:
# Information criteria per model
aic_dict = {}
bic_dict = {}
for parameters in arima_parameters:
    model = ARIMA(train, order=parameters)
    aic_dict[parameters] = model.fit().aic
    bic_dict[parameters] = model.fit().bic
min_aic = aic_dict[min(aic_dict, key=aic_dict.get)]
min_bic = bic_dict[min(bic_dict, key=bic_dict.get)]
print("Best AIC: " + str(min(aic_dict, key=aic_dict.get)) +": "+ str(int(min_aic)))
print("Best BIC: " + str(min(bic_dict, key=bic_dict.get)) +": "+ str(int(min_bic)))

Best AIC: (1, 0, 2): -1014
Best BIC: (0, 0, 0): -1004


In [73]:
train_size = int(len(insample))
train, test = series[0:train_size], series[train_size:]
history = [x for x in insample]

arima_parameters.append(order)
model = ARIMA(history, order=(2,1,0))
predictions = []
for t in range(len(test)):
    model_fit = model.fit()
    yhat = model_fit.forecast()[0]
    predictions.append(yhat)
rmse[order] = mean_squared_error(test, predictions)

In [94]:
model_fit.summary()

0,1,2,3
Dep. Variable:,y,No. Observations:,151.0
Model:,"ARIMA(2, 1, 0)",Log Likelihood,516.054
Date:,"Sat, 16 Jul 2022",AIC,-1026.108
Time:,00:10:30,BIC,-1017.076
Sample:,0,HQIC,-1022.439
,- 151,,
Covariance Type:,opg,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
ar.L1,-0.7501,0.091,-8.232,0.000,-0.929,-0.572
ar.L2,-0.2770,0.082,-3.396,0.001,-0.437,-0.117
sigma2,5.988e-05,5.82e-06,10.288,0.000,4.85e-05,7.13e-05

0,1,2,3
Ljung-Box (L1) (Q):,0.47,Jarque-Bera (JB):,10.17
Prob(Q):,0.49,Prob(JB):,0.01
Heteroskedasticity (H):,0.43,Skew:,-0.47
Prob(H) (two-sided):,0.0,Kurtosis:,3.87
