In [1]:
%load_ext nb_black

<IPython.core.display.Javascript object>

In [None]:
import pandas as pd
import plotly.express as px
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.arima_model import ARMA
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from fbprophet import Prophet

In [None]:
data = pd.read_csv('https://tf-assets-prod.s3.amazonaws.com/tf-curric/data-science/Data%20Sets%20Time%20Series%20Analysis/Time%20Series%20-%20Day%204/MSFT_data.csv')
data['date'] = pd.to_datetime(data['date'])
data.columns = ['Date', 'Open', 'High', 'Low', 'Observed', 'Volume', 'Ticker']

train = data.iloc[:-30]
test = data.iloc[-30:]

In [None]:
# def test_split(data, test_size=0.20):
  
#   ts = int(len(data) * test_size)

#   train = data.iloc[:-ts]
#   test = data.iloc[-ts:]

#   return train, test

In [None]:
# train, test = test_split(data, test_size=0.30)

In [None]:
from sklearn.model_selection import train_test_split

train, test = train_test_split(data, test_size=30, shuffle=False)

In [None]:
train.tail()

In [None]:
test.head()

In [None]:
def ilinechart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))
    
    fig.show()

def multiline_chart(df, x, y, groups=None, title=''):
    fig = px.line(df, x=x, y=y, color=groups, title=title, 
                  template='none').update(layout=dict(title=dict(x=0.5)))

    for item in range(len(fig.data)):
        fig.data[item].update(name=fig.data[item]['name'].split('=')[1])

    fig.show()

In [None]:
ilinechart(train, 'Date', 'Observed', title='Observations Over Time')

In [None]:
def ts_decomp(data, time_field, obs_values, model='additive', freq=None, viz='separate'):
    series = data[[time_field, obs_values]].set_index('Date')
    decomposition = sm.tsa.seasonal_decompose(series, model=model, freq=freq)

    trend = decomposition.trend
    trend = trend[trend[obs_values].isna()==False].reset_index()
    seasonality = decomposition.seasonal.reset_index()
    residuals = decomposition.resid.reset_index()

    if viz == 'combined':
        merged = series.merge(trend, on=time_field)
        merged = merged.merge(seasonality, on=time_field)
        merged = merged.merge(residuals, on=time_field)
        merged.columns = [time_field, obs_values, 'Trend', 'Seasonality', 'Residuals']
        merged['T+S'] = merged['Trend'] + merged['Seasonality']
        merged['Total'] = merged['Trend'] + merged['Seasonality'] + merged['Residuals']

        melted = pd.melt(merged, id_vars=time_field, var_name='Variable', value_name='Value',
                         value_vars=[obs_values, 'Trend', 'Seasonality', 'Residuals', 'T+S'])

        multiline_chart(melted, 'Date', 'Value', groups='Variable', title='Observed vs. Components')
    else:
        ilinechart(trend, time_field, obs_values, title='Trend')
        ilinechart(seasonality, time_field, obs_values, title='Seasonality')
        ilinechart(residuals, time_field, obs_values, title='Residuals')

In [None]:
ts_decomp(train, 'Date', 'Observed', freq=365, viz='combined')

In [None]:
model = sm.tsa.ExponentialSmoothing(train['Observed']).fit()
single_exp = model.forecast(30)

In [None]:
model = sm.tsa.ExponentialSmoothing(train['Observed'], trend='additive').fit()
double_exp = model.forecast(30)

In [None]:
model = sm.tsa.ExponentialSmoothing(train['Observed'], trend='additive', 
                                    seasonal='additive', 
                                    seasonal_periods=4).fit()

triple_exp = model.forecast(30)

In [None]:
def forecast(data, field, model, periods):
    model_results = model.forecast(periods)[0]
    current_value = data[field].iloc[-1]
    forecasts = []
    
    for result in model_results:
        forecast = current_value + result
        forecasts.append(forecast)
        current_value = forecast
    
    forecasts = pd.Series(forecasts)
    forecasts.index = [x + periods 
                       for x in list(data[field].tail(periods).index)]
    
    return forecasts

In [None]:
train['Shift'] = train['Observed'].shift(1).fillna(method='bfill')
train['Diff'] = train['Observed'] - train['Shift']

In [None]:
model = ARMA(train['Diff'], order=(2, 1)).fit()
arma_forecasts = forecast(train, 'Observed', model, 30)
arma_forecasts.head(20)

In [None]:
model = ARIMA(train['Diff'], order=(2, 1, 1)).fit()
arima_forecasts = forecast(train, 'Observed', model, 30)
arima_forecasts.head(20)

In [None]:
model = SARIMAX(train['Observed'], order=(2, 1, 1), 
                seasonal_order=(1, 1, 1, 1)).fit()

sarima_forecasts = model.forecast(30)
sarima_forecasts.head(20)

In [None]:
series = train[['Date', 'Observed']]
series.columns = ['ds', 'y']

model = Prophet()
model.fit(series)

future = model.make_future_dataframe(30)
results = model.predict(future)
prophet_forecasts = results.iloc[-30:]['yhat']
prophet_forecasts.head(20)

In [None]:
train.tail()

In [None]:
results.head(5)['yhat']

In [None]:
future_dates = pd.DataFrame({'Date': pd.date_range(start=train.Date.iloc[-1], periods=31, 
                                                   freq='D', closed='right')})

fcast_df = pd.concat([pd.DataFrame(train['Date']), future_dates], ignore_index=True)
fcast_df = fcast_df.merge(train[['Date', 'Observed']], on='Date', how='left')

fcast_df['Double_Exp_Smooth'] = double_exp
fcast_df['Triple_Exp_Smooth'] = triple_exp
fcast_df['ARMA'] = arma_forecasts
fcast_df['ARIMA'] = arima_forecasts
fcast_df['SARIMA'] = sarima_forecasts
fcast_df['Prophet'] = prophet_forecasts

melted = pd.melt(fcast_df, id_vars='Date', var_name='Variable', value_name='Value', 
                 value_vars=list(fcast_df.columns)[1:])

multiline_chart(melted, 'Date', 'Value', groups='Variable', 
                title='Forecast Comparison')

In [None]:
fcast_df.columns

In [None]:
test.head()

In [None]:
fcast_df.iloc[-30:]

In [None]:
import numpy as np

models = ['Double_Exp_Smooth', 'Triple_Exp_Smooth', 'ARMA', 'ARIMA', 'SARIMA', 'Prophet']
predictions = fcast_df.iloc[-30:][models]

for model in models:
  error = test['Observed'] - predictions[model]
  mae = error.abs().mean()
  rmse = np.sqrt(np.mean(error**2))
  print(f'Model: {model} - mae: {mae}| rmse: {rmse}')