In [14]:
import pandas as pd
import numpy as np

from arch import arch_model
from statsmodels.tsa.arima.model import ARIMA

import warnings
warnings.filterwarnings('ignore')

%matplotlib inline

In [2]:
data = pd.read_csv('vodafone_data.csv')

In [3]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2016-02-11,29.33,29.51,29.030001,29.27,4805000
1,2016-02-12,29.52,29.950001,29.41,29.9,3116500
2,2016-02-16,29.950001,30.42,29.9,30.26,4037900
3,2016-02-17,31.0,31.32,30.9,31.09,4313800
4,2016-02-18,31.09,31.15,30.92,30.959999,3522800


## Підготовка даних

In [4]:
def dataset_preprocess(df):
    df['Date'] = pd.to_datetime(df['Date'])
    df['Date_index'] = pd.DatetimeIndex(df['Date']) 
    
    data_no_missing = df.copy(deep=True)

    data_no_missing = data_no_missing.set_index('Date_index').asfreq('D')

    for col in ['Open', 'High', 'Low', 'Close', 'Volume']:
        data_no_missing[col] = data_no_missing[col].interpolate()

    data_no_missing = data_no_missing.reset_index(drop=False)

    data_no_missing['Date'] = pd.to_datetime(data_no_missing['Date_index']).dt.date
    data_no_missing['year'] = pd.to_datetime(data_no_missing['Date']).dt.year
    data_no_missing['quarter'] = pd.to_datetime(data_no_missing['Date']).dt.quarter
    data_no_missing['month'] = pd.to_datetime(data_no_missing['Date']).dt.month
    
    return df, data_no_missing

In [5]:
df, data_no_missing = dataset_preprocess(data)

### Прогноз ціни на акції на кінець дня на один крок вперед

In [6]:
data_no_missing.head()

Unnamed: 0,Date_index,Date,Open,High,Low,Close,Volume,year,quarter,month
0,2016-02-11,2016-02-11,29.33,29.51,29.030001,29.27,4805000.0,2016,1,2
1,2016-02-12,2016-02-12,29.52,29.950001,29.41,29.9,3116500.0,2016,1,2
2,2016-02-13,2016-02-13,29.6275,30.067501,29.5325,29.99,3346850.0,2016,1,2
3,2016-02-14,2016-02-14,29.735,30.185001,29.655,30.08,3577200.0,2016,1,2
4,2016-02-15,2016-02-15,29.842501,30.3025,29.7775,30.17,3807550.0,2016,1,2


In [12]:
REAL_CLOSE_NEXT_1_STEP = 18.70

In [8]:
model = ARIMA(data_no_missing['Close'], order=(8, 2, 9))

In [9]:
model_fitted = model.fit()

In [13]:
print(f'Predicted Value: {model_fitted.forecast().values[0]}, Real value: {REAL_CLOSE_NEXT_1_STEP}')

Predicted Value: 18.76108362000851, Real value: 18.7


### Прогноз наступного значення дисперсії на 1 крок

In [15]:
CLOSING_VALUES = data_no_missing.Close.tolist() + [REAL_CLOSE_NEXT_1_STEP]

In [16]:
REAL_VARIANCE_NON_STATIONARY = pd.Series(CLOSING_VALUES).std() ** 2
REAL_VARIANCE_NON_STATIONARY

35.8778397721716

In [17]:
def to_stationary(df, target_column='Close'):
    ts_log = np.log(df[target_column])
    ts_diff = ts_log.diff(periods=1).dropna()
    stationary_ts = ts_diff.diff(periods=1).dropna()
    
    return stationary_ts

In [18]:
df_stationary = to_stationary(data_no_missing)

In [19]:
REAL_VARIANCE_STATIONARY = pd.Series(df_stationary).std() ** 2
REAL_VARIANCE_STATIONARY

0.0003568731924588563

In [20]:
model_arhc = arch_model(np.power(df_stationary, 2), p=17, q=6, vol='GARCH')

In [21]:
model_arhc_fitted = model_arhc.fit()

Iteration:      1,   Func. Count:     27,   Neg. LLF: 4989613007135304.0
Iteration:      2,   Func. Count:     62,   Neg. LLF: -8551.299306739225
Inequality constraints incompatible    (Exit mode 4)
            Current function value: -8551.299297552061
            Iterations: 2
            Function evaluations: 62
            Gradient evaluations: 2


Inequality constraints incompatible
See scipy.optimize.fmin_slsqp for code meaning.



In [22]:
model_arhc_fitted.summary()

0,1,2,3
Dep. Variable:,Close,R-squared:,0.0
Mean Model:,Constant Mean,Adj. R-squared:,0.0
Vol Model:,GARCH,Log-Likelihood:,8551.3
Distribution:,Normal,AIC:,-17052.6
Method:,Maximum Likelihood,BIC:,-16914.9
,,No. Observations:,1826.0
Date:,"Sun, Mar 21 2021",Df Residuals:,1825.0
Time:,23:15:27,Df Model:,1.0

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
mu,-1.6094e-03,1.270e-10,-1.268e+07,0.000,"[-1.609e-03,-1.609e-03]"

0,1,2,3,4,5
,coef,std err,t,P>|t|,95.0% Conf. Int.
omega,4.9156e-08,8.046e-14,6.109e+05,0.000,"[4.916e-08,4.916e-08]"
alpha[1],0.0118,1.229e-02,0.957,0.338,"[-1.232e-02,3.585e-02]"
alpha[2],0.0118,3.380e-02,0.348,0.728,"[-5.447e-02,7.800e-02]"
alpha[3],0.0118,4.414e-02,0.267,0.790,"[-7.474e-02,9.827e-02]"
alpha[4],0.0118,6.637e-02,0.177,0.859,"[ -0.118, 0.142]"
alpha[5],0.0118,5.328e-02,0.221,0.825,"[-9.266e-02, 0.116]"
alpha[6],0.0118,6.903e-02,0.170,0.865,"[ -0.124, 0.147]"
alpha[7],0.0118,5.448e-02,0.216,0.829,"[-9.501e-02, 0.119]"
alpha[8],0.0118,3.054e-02,0.385,0.700,"[-4.809e-02,7.162e-02]"


In [25]:
print(f"""Predicted Value: {model_arhc_fitted.forecast(horizon=1).variance.values[-1][0]},
      Real Value: {REAL_VARIANCE_STATIONARY}""")

Predicted Value: 5.168567792006029e-06,
      Real Value: 0.0003568731924588563
