##### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import datetime
from statsmodels.tsa.ar_model import AutoReg
import warnings
from sklearn.metrics import mean_absolute_percentage_error, mean_absolute_error, mean_squared_error

%matplotlib inline
warnings.filterwarnings('ignore')

##### Definition of Helper Functions

In [2]:
def error_table(model_name, y_real, y_pred):
    mae = mean_absolute_error(y_real, y_pred)
    mape = mean_absolute_percentage_error(y_real, y_pred)
    rmse = np.sqrt(mean_squared_error(y_real, y_pred))

    return pd.DataFrame({'Model Name': model_name,
                        'MAE': mae,
                        'MAPE': mape,
                        'RMSE': rmse}, index = [model_name])

In [3]:
df = pd.read_csv("../data/week_sales.csv", usecols=['Date', 'Store', 'Sales'], parse_dates=['Date'], index_col='Date')
#df.sort_index(ascending=False, inplace=True)
display(df)

Unnamed: 0_level_0,Store,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2012-12-31,1,10.185541
2012-12-31,2,10.007082
2012-12-31,3,10.479117
2012-12-31,4,10.798126
2012-12-31,5,9.939964
...,...,...
2015-07-27,1111,10.029547
2015-07-27,1112,10.546420
2015-07-27,1113,10.235988
2015-07-27,1114,11.545025


##### Create a separate train- and test time series for each store:

In [4]:
# 8 weeks before the last date
test_date = df.index[-1]- datetime.timedelta(weeks=7)

y_train = []
y_test = []
for store in np.sort(df['Store'].unique()):
    # a time series per store 
    store_mask = df['Store'] == store
    store_df = df[store_mask]
    # append time series
    y_train.append(store_df[store_df.index < test_date])
    y_test.append(store_df[store_df.index >= test_date])

##### Autoregressive Model AR(1)

In [5]:
y_train_0 = y_train[0]['Sales']
y_test_0 = y_test[0]['Sales']

In [6]:
ar_model = AutoReg(y_train_0, lags=1)
ar_results = ar_model.fit()

In [7]:
y_pred_0 = ar_results.predict(start=len(y_train_0), end=len(y_train_0) + len(y_test_0) - 1, dynamic=False)

In [8]:
y_train_0

Date
2012-12-31    10.185541
2013-01-07    10.325285
2013-01-14    10.190770
2013-01-21    10.365112
2013-01-28    10.363157
                ...    
2015-05-04    10.212185
2015-05-11     9.972780
2015-05-18    10.019536
2015-05-25    10.255130
2015-06-01    10.122543
Name: Sales, Length: 127, dtype: float64

In [9]:
y_pred_0

2015-06-08    10.193738
2015-06-15    10.212730
2015-06-22    10.217797
2015-06-29    10.219149
2015-07-06    10.219509
2015-07-13    10.219606
2015-07-20    10.219631
2015-07-27    10.219638
Freq: W-MON, dtype: float64

In [10]:
y_test_0

Date
2015-06-08    10.118196
2015-06-15    10.152026
2015-06-22    10.050829
2015-06-29    10.320156
2015-07-06    10.074790
2015-07-13    10.236023
2015-07-20    10.125190
2015-07-27     9.907330
Name: Sales, dtype: float64

In [11]:
error_table( 'AR(1)', np.expm1(y_test_0), np.expm1(y_pred_0))

Unnamed: 0,Model Name,MAE,MAPE,RMSE
AR(1),AR(1),3082.337745,0.132064,3652.68939
