In [None]:
# 1. Evaluate ARIMA Model

# This approach involves the following steps:

# 1. Split the dataset into training and test sets.
# 2. Walk the time steps in the test dataset.
#   2.1. Train an ARIMA model.
#   2.2. Make a one-step prediction.
#   2.3. Store prediction; get and store actual observation.
# 3. Calculate error score for predictions compared to expected values

# We can implement this in Python as a new standalone function called evaluate_arima_model() that takes a time series dataset as input as well as a tuple with the p, d, and q parameters for the model to be evaluated.

# Import libs
# The dataset is split in two: 66% for the initial training dataset and the remaining 34% for the test dataset.


In [1]:
# 2. Iterate ARIMA Parameters

# it is recommended that warnings be ignored for this code to avoid a lot of noise from running the procedure. This can be done as follows:
import warnings
warnings.filterwarnings("ignore")

In [4]:
# grid search ARIMA parameters for time series
import warnings
from math import sqrt
from pandas import read_csv
from pandas import datetime
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# evaluate an ARIMA model for a given order (p,d,q)
def evaluate_arima_model(X, arima_order):
	# prepare training dataset
	train_size = int(len(X) * 0.66)
	train, test = X[0:train_size], X[train_size:]
	history = [x for x in train]
	# make predictions
	predictions = list()
	for t in range(len(test)):
		model = ARIMA(history, order=arima_order)
		model_fit = model.fit()
		yhat = model_fit.forecast()[0]
		predictions.append(yhat)
		history.append(test[t])
	# calculate out of sample error
	rmse = sqrt(mean_squared_error(test, predictions))
	return rmse

# evaluate combinations of p, d and q values for an ARIMA model
def evaluate_models(dataset, p_values, d_values, q_values):
	dataset = dataset.astype('float32')
	best_score, best_cfg = float("inf"), None
	for p in p_values:
		for d in d_values:
			for q in q_values:
				order = (p,d,q)
				try:
					rmse = evaluate_arima_model(dataset, order)
					if rmse < best_score:
						best_score, best_cfg = rmse, order
					print('ARIMA%s RMSE=%.3f' % (order,rmse))
				except:
					continue
	print('Best ARIMA%s RMSE=%.3f' % (best_cfg, best_score))

# load dataset
def parser(x):
    # print(x)
	return datetime.strptime(x, "%d/%m/%Y")
series = read_csv('./input/McDonald_Test.csv', header=0, index_col=0, parse_dates=True, squeeze=True, date_parser=parser)

series.index = series.index.to_period('M')

# evaluate parameters
p_values = [0, 1, 2, 4, 6, 8, 10]
d_values = range(0, 3)
q_values = range(0, 3)
warnings.filterwarnings("ignore")
evaluate_models(series.values, p_values, d_values, q_values)

ARIMA(0, 0, 0) RMSE=0.772
ARIMA(0, 0, 1) RMSE=0.577
ARIMA(0, 0, 2) RMSE=0.656
ARIMA(0, 1, 0) RMSE=0.508
ARIMA(0, 1, 1) RMSE=0.595
ARIMA(0, 1, 2) RMSE=0.481
ARIMA(0, 2, 0) RMSE=0.792
ARIMA(0, 2, 1) RMSE=0.528
ARIMA(0, 2, 2) RMSE=0.566
ARIMA(1, 0, 0) RMSE=0.498
ARIMA(1, 0, 1) RMSE=0.541
ARIMA(1, 0, 2) RMSE=0.476
ARIMA(1, 1, 0) RMSE=0.521
ARIMA(1, 1, 1) RMSE=0.550
ARIMA(1, 1, 2) RMSE=0.486
ARIMA(1, 2, 0) RMSE=0.734
ARIMA(1, 2, 1) RMSE=0.537
ARIMA(1, 2, 2) RMSE=0.548
ARIMA(2, 0, 0) RMSE=0.510
ARIMA(2, 0, 1) RMSE=0.525
ARIMA(2, 0, 2) RMSE=0.481
ARIMA(2, 1, 0) RMSE=0.470
ARIMA(2, 1, 1) RMSE=0.468
ARIMA(2, 1, 2) RMSE=0.487
ARIMA(2, 2, 0) RMSE=0.648
ARIMA(2, 2, 1) RMSE=0.468
ARIMA(2, 2, 2) RMSE=0.453
ARIMA(4, 0, 0) RMSE=0.462
ARIMA(4, 0, 1) RMSE=0.487
ARIMA(4, 0, 2) RMSE=0.487
ARIMA(4, 1, 0) RMSE=0.465
ARIMA(4, 1, 1) RMSE=0.519
ARIMA(4, 1, 2) RMSE=0.503
ARIMA(4, 2, 0) RMSE=0.519
ARIMA(4, 2, 1) RMSE=0.467
ARIMA(4, 2, 2) RMSE=0.518
ARIMA(6, 0, 0) RMSE=0.483
ARIMA(6, 0, 1) RMSE=0.495
ARIMA(6, 0, 

In [None]:
# Best ARIMA(2, 2, 2) RMSE=0.453

