<a href="https://colab.research.google.com/github/KonuTech/Time-Series-Analysis-Forecasting-and-Machine-Learning/blob/main/Walk_Forward_Validation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!wget -nc https://lazyprogrammer.me/course_files/airline_passengers.csv

--2021-06-20 18:37:01--  https://lazyprogrammer.me/course_files/airline_passengers.csv
Resolving lazyprogrammer.me (lazyprogrammer.me)... 172.67.213.166, 104.21.23.210, 2606:4700:3030::ac43:d5a6, ...
Connecting to lazyprogrammer.me (lazyprogrammer.me)|172.67.213.166|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2036 (2.0K) [text/csv]
Saving to: ‘airline_passengers.csv’


2021-06-20 18:37:01 (42.2 MB/s) - ‘airline_passengers.csv’ saved [2036/2036]



In [2]:
!pip install -U statsmodels

Collecting statsmodels
[?25l  Downloading https://files.pythonhosted.org/packages/da/69/8eef30a6237c54f3c0b524140e2975f4b1eea3489b45eb3339574fc8acee/statsmodels-0.12.2-cp37-cp37m-manylinux1_x86_64.whl (9.5MB)
[K     |████████████████████████████████| 9.5MB 6.8MB/s 
Installing collected packages: statsmodels
  Found existing installation: statsmodels 0.10.2
    Uninstalling statsmodels-0.10.2:
      Successfully uninstalled statsmodels-0.10.2
Successfully installed statsmodels-0.12.2


In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import itertools

from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [5]:
df = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)

In [6]:
df.index.freq = 'MS'

In [7]:
df.shape

(144, 1)

In [9]:
# Asssume the forecast horizon we care about is 12
# Validate over 10 steps
h = 12
steps = 10
Ntest = len(df) - h - steps + 1

In [25]:
# Configuration hyperparametrs to try
trend_type_list = ['add', 'mul']
seasonal_type_list = ['add', 'mul']
damped_trend_list = [True, False]
init_method_list = ['estimated', 'heuristic', 'legacy-heuristic']
use_boxcox_list = [True, False, 0]

In [26]:
# Note: statsmodels documentation states that 'log' is an acceptable input for use_boxcox. This is false.

In [27]:
def walkforward(
    trend_type,
    seasonal_type,
    damped_trend,
    init_method,
    use_boxcox,
    debug=False
    ):

    # store errors
    errors = []
    seen_last = False
    steps_completed = 0

    for end_of_train in range(Ntest, len(df) - h + 1):
      # We don't have to manually "add" the data to our dataset
      # Just index it at the right points - this is a "view" not a "copy"
      # So it doesn't take up any extra space or computation
      train = df.iloc[:end_of_train]
      test = df.iloc[end_of_train:end_of_train + h]

      if test.index[-1] == df.index[-1]:
        seen_last = True
      
      steps_completed += 1

      hw = ExponentialSmoothing(
          train['Passengers'],
          initialization_method=init_method,
          trend=trend_type,
          damped_trend=damped_trend,
          seasonal=seasonal_type,
          seasonal_periods=12,
          use_boxcox=use_boxcox)
      res_hw = hw.fit()

      # compute error for the forecast horizon
      fcast = res_hw.forecast(h)
      error = mean_squared_error(test['Passengers'], fcast)
      errors.append(error)

      if debug:
        print("seen_last:", seen_last)
        print("steps_completed:", steps_completed)

      return np.mean(errors)

In [28]:
# test our function
walkforward('add', 'add', False, 'legacy-heuristic', 0, debug=True)

seen_last: False
steps_completed: 1


4480.04257518788

In [29]:
# Iterate through all possible options (i.e. grif search)
tuple_of_option_lists = (
    trend_type_list,
    seasonal_type_list,
    damped_trens_list,
    init_method_list,
    use_boxcox_list
)

for x in itertools.product(*tuple_of_option_lists):
  print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [30]:
best_score = float('inf')
best_options = None
for x in itertools.product(*tuple_of_option_lists):
  score = walkforward(*x)

  if score < best_score:
    print("Best score so far:", score)
    best_score = score
    best_options = x

Best score so far: 380.65232061527036
Best score so far: 241.5619978704101
Best score so far: 241.24315400926346


  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 241.02106331277878


  return err.T @ err


Best score so far: 212.89909403206025


  return err.T @ err
  return err.T @ err
  return err.T @ err


Best score so far: 209.89845401368112


  return err.T @ err
  return err.T @ err


In [31]:
print("best score:", best_score)

trend_type, seasonal_type, damped_trend, init_method, use_boxcox = best_options
print("trend_type", trend_type)
print("seasonal_type", seasonal_type)
print("damped_trend", damped_trend)
print("init_method", init_method)
print("use_boxcox", use_boxcox)

best score: 209.89845401368112
trend_type mul
seasonal_type mul
damped_trend False
init_method estimated
use_boxcox False
