#Libraries and data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#change directory
%cd /content/drive/MyDrive/Time Series Forecasting Product

/content/drive/MyDrive/Time Series Forecasting Product


In [None]:
#Install Greykite
!pip install greykite

In [None]:
#libraries
import numpy as np
import pandas as pd
from greykite.framework.templates.autogen.forecast_config import *
from greykite.framework.templates.forecaster import Forecaster
from greykite.framework.templates.model_templates import ModelTemplateEnum
from greykite.common.features.timeseries_features import *
from greykite.common.evaluation import EvaluationMetricEnum
from greykite.framework.utils.result_summary import summarize_grid_search_results
from plotly.offline import iplot

In [None]:
#load the data
#YYYY-MM-DD
df = pd.read_csv('nyc_data.csv')
future_df = pd.read_csv('future.csv')
future_df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2021,,0,0,0,5.0,154.221
1,1/2/2021,,0,0,0,11.11,264.805
2,1/3/2021,,0,0,0,3.89,115.499
3,1/4/2021,,0,0,0,6.67,124.65
4,1/5/2021,,0,0,0,5.56,77.968


In [None]:
#get the best parameters
parameters  = pd.read_csv("Forecasting Product/best_params_silverkite.csv", 
                          index_col = 0)
parameters

Unnamed: 0,"[('estimator__growth_term', 'quadratic'), ('estimator__fit_algorithm_dict', {'fit_algorithm': 'linear'})]"
mean_test_RMSE,47.8
param_estimator__fit_algorithm_dict,{'fit_algorithm': 'linear'}
param_estimator__growth_term,quadratic


In [None]:
#get the parameters
growth_term_param = parameters.loc["param_estimator__growth_term"][0]
fit_algorithm_param = parameters.loc["param_estimator__fit_algorithm_dict"][0]

In [None]:
#merging both
df = pd.concat([df, future_df])
df = df.reset_index(drop = True)
df.head()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
0,1/1/2015,720.000885,0,0,0,3.68,41.305
1,1/2/2015,581.276773,0,0,0,4.73,131.574
2,1/3/2015,754.117039,0,0,0,7.23,162.7
3,1/4/2015,622.252774,0,0,0,10.96,160.281
4,1/5/2015,785.373319,0,0,0,6.92,51.077


In [None]:
#inspecting df
df.tail()

Unnamed: 0,Date,Demand,Easter,Thanksgiving,Christmas,Temperature,Marketing
2218,1/27/2021,,0,0,0,3.33,39.664
2219,1/28/2021,,0,0,0,1.67,195.314
2220,1/29/2021,,0,0,0,-2.78,235.894
2221,1/30/2021,,0,0,0,1.11,152.752
2222,1/31/2021,,0,0,0,4.44,158.62


In [None]:
#Rename variable
df = df.rename(columns = {'Demand': 'y'})
df.head(0)

Unnamed: 0,Date,y,Easter,Thanksgiving,Christmas,Temperature,Marketing


#Silverkite Preparations

In [None]:
#Specifying Time Series names
metadata = MetadataParam(time_col = "Date",
                         value_col = "y",
                         freq = "D",
                         train_end_date = pd.to_datetime("2020-12-31"))
metadata

MetadataParam(anomaly_info=None, date_format=None, freq='D', time_col='Date', train_end_date=Timestamp('2020-12-31 00:00:00'), value_col='y')

In [None]:
#growth terms possibilities
growth = dict(growth_term = growth_term_param)
growth

{'growth_term': 'quadratic'}

In [None]:
#seasonalities
seasonality = dict(yearly_seasonality = "auto",
                   quarterly_seasonality = "auto",
                   monthly_seasonality = "auto",
                   weekly_seasonality = "auto",
                   daily_seasonality = "auto")
seasonality

{'daily_seasonality': 'auto',
 'monthly_seasonality': 'auto',
 'quarterly_seasonality': 'auto',
 'weekly_seasonality': 'auto',
 'yearly_seasonality': 'auto'}

In [None]:
#Specifying events
events = dict(holidays_to_model_separately = ["New Year's Day"],
              holiday_lookup_countries = ["US"],
              holiday_pre_num_days = 2,
              holiday_post_num_days = 2,
              holiday_pre_post_num_dict = {"New Year's Day": (3,1)},
              daily_event_df_dict = {"elections": pd.DataFrame({
                  "date": ["2016-11-08", "2020-11-03"],
                  "event_name": ["elections"] * 2
              })})
events

{'daily_event_df_dict': {'elections':          date event_name
  0  2016-11-08  elections
  1  2020-11-03  elections},
 'holiday_lookup_countries': ['US'],
 'holiday_post_num_days': 2,
 'holiday_pre_num_days': 2,
 'holiday_pre_post_num_dict': {"New Year's Day": (3, 1)},
 'holidays_to_model_separately': ["New Year's Day"]}

In [None]:
#Changepoints -> reflects the changes in the trend
changepoints = dict(changepoints_dict = dict(method = "auto"))

In [None]:
#Regressors
regressors = dict(regressor_cols = ["Easter", "Temperature", "Marketing"])
regressors

{'regressor_cols': ['Easter', 'Temperature', 'Marketing']}

In [None]:
#Lagged Regressors
lagged_regressors = dict(lagged_regressor_dict = {"Temperature": "auto",
                                                  "Easter": "auto",
                                                  "Marketing": "auto"})

In [None]:
#autogression -> dependent on the forecasting horizon
autoregression = dict(autoreg_dict = "auto")

In [None]:
fit_algorithm_param

"{'fit_algorithm': 'linear'}"

In [None]:
#Fitting algorithms
import yaml
custom = dict(fit_algorithm_dict = yaml.load(fit_algorithm_param))
custom

{'fit_algorithm_dict': {'fit_algorithm': 'linear'}}

#Silverkite Model

In [None]:
#Build the model
model_components = ModelComponentsParam(growth = growth,
                                        seasonality = seasonality,
                                        events = events,
                                        changepoints = changepoints,
                                        regressors = regressors,
                                        lagged_regressors = lagged_regressors,
                                        autoregression = autoregression,
                                        custom = custom)

In [None]:
#Cross-validation
evaluation_period = EvaluationPeriodParam(cv_min_train_periods= df.shape[0] - 180 -31,
                                          cv_expanding_window = True,
                                          cv_max_splits = 50,
                                          cv_periods_between_splits = 16)

In [None]:
#Evaluation metric 
evaluation_metric = EvaluationMetricParam(
    cv_selection_metric = EvaluationMetricEnum.RootMeanSquaredError.name)

In [None]:
#Configuration
config = ForecastConfig(model_template = ModelTemplateEnum.SILVERKITE.name,
                        forecast_horizon = 31,
                        metadata_param = metadata,
                        model_components_param = model_components,
                        evaluation_period_param=evaluation_period,
                        evaluation_metric_param = evaluation_metric)

In [None]:
#Forecasting
forecaster = Forecaster()
result = forecaster.run_forecast_config(df = df,
                                        config = config)

In [None]:
#Look at the model summary
summary = result.model[-1].summary() #-1 restrieves the estimator from the pipeline
print(summary)


Number of observations: 2192,   Number of features: 182
Method: Ordinary least squares
Number of nonzero features: 182

Residuals:
         Min           1Q       Median           3Q          Max
      -164.4       -32.68      -0.8196        30.66        234.1

            Pred_col Estimate Std. Err   t value Pr(>|t|) sig. code               95%CI
           Intercept    559.5    5.726     97.72   <2e-16       ***      (548.3, 570.7)
events_New Years Day   -15.72    22.47   -0.6996    0.484               (-59.78, 28.34)
 events_N...rs Day-1   -45.29    22.61    -2.003    0.045         *   (-89.63, -0.9385)
 events_N...rs Day-2   -7.534    22.65   -0.3326    0.739               (-51.96, 36.89)
 events_N...rs Day-3   -40.91     22.6     -1.81    0.070         .     (-85.22, 3.405)
 events_N...rs Day+1   -3.559    22.39    -0.159    0.874               (-47.47, 40.35)
        events_Other    -3.06    7.101   -0.4309    0.667               (-16.99, 10.87)
      events_Other-1   -4.952    

In [None]:
#visualization
fig = result.forecast.plot_components()
iplot(fig)

In [None]:
#getting the forecast result
forecast = result.forecast.df[["Date", "forecast"]]
forecast = forecast.rename(columns = {'forecast': 'silverkite'})
predictions_silverkite = forecast.iloc[-31:,:]
predictions_silverkite

In [None]:
#exporting
predictions_silverkite.to_csv("Forecasting Product/Ensemble/predictions_silverkite.csv")