# M3 month with exog (full prediction)

In [53]:
import numpy as np
import pyramid
import pandas as pd
import math
from scipy.stats import norm
import statsmodels.api as sm
import matplotlib.pyplot as plt
from datetime import datetime
import requests
from io import BytesIO
from sklearn import metrics
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import Legend
import pandas as pd
from bokeh.models import Span
from pyramid.arima import auto_arima

In [54]:
def mean_absolute_percentage_error(y_true, y_pred): 
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [55]:
my_df = pd.read_csv('my_data_frame.csv',index_col=0)

In [56]:
train_data = np.array(my_df.iloc[:,6:56]).astype(np.float32).T
test_data = np.array(my_df.iloc[:,56:74]).astype(np.float32).T

In [57]:
def fitters(train_data, random=True):
    N_obs = train_data.shape[0]
    N_ts = train_data.shape[1]
    models = []
    for i in range(N_ts):
        indices = np.arange(N_ts)
        X = train_data[:-1,indices!=i]
        y = train_data[:,i].reshape(-1)
        if (random==True):
            model = auto_arima(y[1:], exogenous=X,start_p=1, start_q=1, max_p=5, max_q=5, m=12,
                    start_P=0, n_jobs=-1, max_d=4, max_D=2,max_P = 2, max_Q = 2, trace=False,
                    error_action='ignore',  # don't want to know if an order does not work
                    suppress_warnings=True,  # don't want convergence warnings
                    stepwise=False, random=True, random_state=42,  # we can fit a random search (not exhaustive)
                    n_fits=200)
            models.append(model)
        else:
            model = auto_arima(y[1:], exogenous = X,start_p=1, start_q=1, max_p=5, max_q=5, m=12,
                          start_P=0, max_d=4, max_D=2,max_P = 2, max_Q = 2, trace=False,
                          error_action='ignore',  # don't want to know if an order does not work
                          suppress_warnings=True,  # don't want convergence warnings
                          stepwise=True)  # set to stepwise
            models.append(model)
    return models      

In [58]:
def predictions(n_periods,train_data,models):
    N_ts = train_data.shape[1]
    predictions = np.empty((n_periods,N_ts))
    for n in range(n_periods):
        prediction = []
        indices = np.arange(N_ts)
        for i in range(N_ts):
            if (n==0):
                x = train_data[-1,indices!=i].reshape(-1,N_ts-1)
            else:
                x = predictions[n-1,indices!=i].reshape(-1,N_ts-1)
            prediction.append(models[i].predict(n_periods=1,exogenous = x))
        predictions[n] = prediction
    return predictions

In [59]:
models = fitters(train_data,random=False)

In [69]:
predictions_ts = predictions(n_periods=18,train_data=train_data,models=models)

In [71]:
output_notebook()

def plot_arima(truth, forecasts, title="ARIMA", xaxis_label='Time',
               yaxis_label='Value', c1='#A6CEE3', c2='#B2DF8A', 
               forecast_start=None, **kwargs):
    
    # make truth and forecasts into pandas series
    n_truth = truth.shape[0]
    n_forecasts = forecasts.shape[0]
    
    # always plot truth the same
    truth = pd.Series(truth, index=np.arange(truth.shape[0]))
    
    # if no defined forecast start, start at the end
    if forecast_start is None:
        idx = np.arange(n_truth, n_truth + n_forecasts)
    else:
        idx = np.arange(forecast_start, n_forecasts)
    forecasts = pd.Series(forecasts, index=idx)
    
    # set up the plot
    p = figure(title=title, plot_height=400, **kwargs)
    p.grid.grid_line_alpha=0.3
    p.xaxis.axis_label = xaxis_label
    p.yaxis.axis_label = yaxis_label
    
    # add the lines
    p.line(forecasts.index, forecasts.values, color=c2, legend='Forecasted')
    p.line(truth.index, truth.values, color=c1, legend='Observed')
    vline = Span(location=49,dimension='height', line_color='red',line_width=1)
    p.renderers.extend([vline])
    
    return p

def plot_arima_comparison(truth_forecast, forecasts, title="Comparison", xaxis_label='Time',
               yaxis_label='Value', c1='#A6CEE3', c2='#B2DF8A', 
               forecast_start=None, **kwargs):
    
    forecasts = pd.Series(forecasts)
    true_forecasts = pd.Series(truth_forecast)
    
    
    # set up the plot
    p = figure(title=title, plot_height=600, plot_width=1000, **kwargs)
    p.grid.grid_line_alpha=0.3
    p.xaxis.axis_label = xaxis_label
    p.yaxis.axis_label = yaxis_label
    
    # add the lines
    l1 = p.line(forecasts.index, forecasts.values, color=c2, line_width = 2)
    l3 = p.line(true_forecasts.index, true_forecasts.values, color=c1,line_width=3)
    

    legend = Legend(items=[
    ('Forecasted_my_Stepwise'   , [l1]),
    #('Forecasted_my_Stepwise_force' , [l7]),
    ('Observed' , [l3]),], location=(0, -30))
    p.add_layout(legend, 'right')
    return p

In [73]:
for i in range(15):
    show(plot_arima_comparison(test_data[:,i],predictions_ts[:,i]))

In [None]:
print ('MAPE (Stepwise)      :', mean_absolute_percentage_error(validate_data,next_validate))
print ('MAPE (Stepwise force):', mean_absolute_percentage_error(validate_data,next_validate_force))
print ('MAPE (RS)            :', mean_absolute_percentage_error(validate_data,next_validate_rs))
print ('MAPE (Theta)         :', mean_absolute_percentage_error(validate_data,forecast_Theta))
print ('MAPE (FPro)          :', mean_absolute_percentage_error(validate_data, forecast_FPro))
print ('MAPE (FX)            :', mean_absolute_percentage_error(validate_data,forecast_FX))

In [74]:
models

[ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(0, 0, 0),
    out_of_sample_size=0, scoring='mse', scoring_args={},
    seasonal_order=(1, 0, 1, 12), solver='lbfgs', start_params=None,
 ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(0, 0, 0),
    out_of_sample_size=0, scoring='mse', scoring_args={},
    seasonal_order=(1, 0, 0, 12), solver='lbfgs', start_params=None,
 ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(0, 0, 1),
    out_of_sample_size=0, scoring='mse', scoring_args={},
    seasonal_order=(0, 0, 0, 12), solver='lbfgs', start_params=None,
 ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(1, 1, 0),
    out_of_sample_size=0, scoring='mse', scoring_args={},
    seasonal_order=(1, 0, 0, 12), solver='lbfgs', start_params=None,
 ARIMA(callback=None, disp=0, maxiter=50, method=None, order=(0, 0, 1),
    out_of_sample_size=0, scoring='mse', scoring_args={},
    seasonal_order=(1, 0, 0, 12), solver='lbfgs', start_params=None,
 ARIM