# ARIMA

In [4]:
! pip install statsmodels



In [5]:
%matplotlib inline

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime

%config InlineBackend.figure_format = 'retina'

In [8]:
import pandas_datareader as pdr

df = pdr.get_data_yahoo('^NSEBANK', '2001-1-1', '2021-10-12')

df.head()

Unnamed: 0_level_0,High,Low,Open,Close,Volume,Adj Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2007-09-17,6977.200195,6843.0,6898.0,6897.100098,0.0,6897.100098
2007-09-18,7078.950195,6883.600098,6921.149902,7059.649902,0.0,7059.649902
2007-09-19,7419.350098,7111.0,7111.0,7401.850098,0.0,7401.850098
2007-09-20,7462.899902,7343.600098,7404.950195,7390.149902,0.0,7390.149902
2007-09-21,7506.350098,7367.149902,7378.299805,7464.5,0.0,7464.5


In [9]:
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values('Date', inplace=True)
df.set_index('Date', inplace=True)

print(df.shape)

df.head()

KeyError: 'Date'

In [None]:
df_week = df.resample('w').mean()
df_week = df_week[['close']]
df_week.head()

In [None]:
# Lets create a comn for weekly returns. Take the log of the returns to normalize large functions

In [None]:
df_week['weekly_ret'] = np.log(df_week['close']).diff()
df_week.head()

In [None]:
# drop null rows
df_week.dropna(inplace=True)

In [None]:
df_week.weekly_ret.plot(kind='line', figsize=(12, 6));

In [None]:
udiff = df_week.drop(['close'], axis=1)
udiff.head()

In [None]:
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

In [None]:
rolmean = udiff.rolling(20).mean()
rolstd = udiff.rolling(20).std()

In [None]:
plt.figure(figsize=(12, 6))
orig = plt.plot(udiff, color='blue', label='Original')
mean = plt.plot(rolmean, color='red', label='Rolling Mean')
std = plt.plot(rolstd, color='black', label = 'Rolling Std Deviation')
plt.title('Rolling Mean & Standard Deviation')
plt.legend(loc='best')
plt.show(block=False)

In [None]:
# Perform Dickey-Fuller test
dftest = sm.tsa.adfuller(udiff.weekly_ret, autolag='AIC')
dfoutput = pd.Series(dftest[0:4], index=['Test Statistic', 'p-value', '#Lags Used', 'Number of Observations Used'])
for key, value in dftest[4].items():
    dfoutput['Critical Value ({0})'.format(key)] = value
    
dfoutput

In [None]:
# ACF and PACF

In [None]:
from statsmodels.graphics.tsaplots import plot_acf

# the autocorrelation chart provides just the correlation at increasing lags
fig, ax = plt.subplots(figsize=(12,5))
plot_acf(udiff.values, lags=10, ax=ax)
plt.show()

In [None]:
from statsmodels.graphics.tsaplots import plot_pacf

fig, ax = plt.subplots(figsize=(12,5))
plot_pacf(udiff.values, lags=10, ax=ax)
plt.show()

In [None]:
# Build ARIMA Model

In [None]:
from statsmodels.tsa.arima_model import ARMA

# Notice that you have to use udiff - the differenced data rather than the original data. 
ar1 = ARMA(tuple(udiff.values), (3, 1)).fit()
ar1.summary()

In [None]:
plt.figure(figsize=(12, 8))
plt.plot(udiff.values, color='blue')
preds = ar1.fittedvalues
plt.plot(preds, color='red')
plt.show()

In [None]:
steps = 2

forecast = ar1.forecast(steps=steps)[0]

plt.figure(figsize=(12, 8))
plt.plot(udiff.values, color='blue')

preds = ar1.fittedvalues
plt.plot(preds, color='red')

plt.plot(pd.DataFrame(np.array([preds[-1],forecast[0]]).T,index=range(len(udiff.values)+1, len(udiff.values)+3)), color='green')
plt.plot(pd.DataFrame(forecast,index=range(len(udiff.values)+1, len(udiff.values)+1+steps)), color='green')
plt.title('Display the predictions with the ARIMA model')
plt.show()