In [3]:
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt

import itertools
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.holtwinters import ExponentialSmoothing

In [4]:
df = yf.Ticker('BTC-USD').history(start="2021-05-12",  end="2021-11-12")
df = pd.DataFrame(df['Close'])
df

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2021-05-11,56704.574219
2021-05-12,49150.535156
2021-05-13,49716.191406
2021-05-14,49880.535156
2021-05-15,46760.187500
...,...
2021-11-07,63326.988281
2021-11-08,67566.828125
2021-11-09,66971.828125
2021-11-10,64995.230469


In [5]:
df.index.freq="D"

In [6]:
df.shape

(185, 1)

In [7]:
# Assume the forecast horizon we care about is 12
# Validate over 10 steps
h = 12
steps = 10
Ntest = len(df) - h -steps +1

In [8]:
# configuration hyperparameters to try 
trend_type_list = ['add','mul']
seasonal_type_list = ['add','mul']
damped_trend_list = [True, False]
init_method_list = ['estimated','heuristic','legacy-heuristic']
use_boxcox_list = [True,False,0]

In [9]:
# NOTE: statsmodel documentation stats taht 'log' is an acceptable inpit for use_boxcox. This is False.

In [22]:
def walkforward(
    trend_type,
    seasonal_type,
    damped_trend,
    init_method,
    use_boxcox,
    debug = False):
    
    #store errors
    errors = []
    seen_last = False
    steps_completed = 0
    
    for end_of_train in range(Ntest, len(df) -h +1):
        # We don't have to manually 'add' the data to our dataset
        # Just index it at the right points - this is a 'view' not a 'copy'
        # So it doesn't take up any extra space or computation
        train = df.iloc[:end_of_train]
        test = df.iloc[end_of_train: end_of_train + h]
        
        if test.index[-1] == df.index[-1]:
            seen_last = True
        
        steps_completed += 1
        
        hw = ExponentialSmoothing(
            train['Close'],
            initialization_method = init_method,
            trend = trend_type,
            damped_trend = damped_trend,
            seasonal = seasonal_type,
            seasonal_periods= 12,
            use_boxcox = use_boxcox)
        res_hw = hw.fit()
        
        #compute error for the forecase horizon
        fcast = res_hw.forecast(h)
        error = mean_squared_error(test['Close'], fcast)
        errors.append(error)
        
        
    if debug: 
        print("seen_last:", seen_last)
        print("steps completed:", steps_completed)
    
    return np.mean(errors)

In [23]:
# test our function
walkforward('add','add', False, 'legacy-heuristic', 0, debug=True)

seen_last: True
steps completed: 10


6073246.114855747

In [24]:
# Iterate through all possible options (i.e. grid search)
tuple_of_option_lists = (
    trend_type_list,
    seasonal_type_list,
    damped_trend_list,
    init_method_list,
    use_boxcox_list)

for x in itertools.product(*tuple_of_option_lists):
    print(x)

('add', 'add', True, 'estimated', True)
('add', 'add', True, 'estimated', False)
('add', 'add', True, 'estimated', 0)
('add', 'add', True, 'heuristic', True)
('add', 'add', True, 'heuristic', False)
('add', 'add', True, 'heuristic', 0)
('add', 'add', True, 'legacy-heuristic', True)
('add', 'add', True, 'legacy-heuristic', False)
('add', 'add', True, 'legacy-heuristic', 0)
('add', 'add', False, 'estimated', True)
('add', 'add', False, 'estimated', False)
('add', 'add', False, 'estimated', 0)
('add', 'add', False, 'heuristic', True)
('add', 'add', False, 'heuristic', False)
('add', 'add', False, 'heuristic', 0)
('add', 'add', False, 'legacy-heuristic', True)
('add', 'add', False, 'legacy-heuristic', False)
('add', 'add', False, 'legacy-heuristic', 0)
('add', 'mul', True, 'estimated', True)
('add', 'mul', True, 'estimated', False)
('add', 'mul', True, 'estimated', 0)
('add', 'mul', True, 'heuristic', True)
('add', 'mul', True, 'heuristic', False)
('add', 'mul', True, 'heuristic', 0)
('add

In [25]:
best_score = float('inf')
best_options = None
for x in itertools.product(*tuple_of_option_lists):
    score = walkforward(*x)
    
    if score < best_score:
        print("Best score so far:", score)
        best_score = score
        best_options = x

Best score so far: 13834562.24885514




Best score so far: 7636188.390512535
Best score so far: 5477418.444183205




Best score so far: 5473625.653527598




Best score so far: 5389891.524826242






ValueError: Input contains NaN, infinity or a value too large for dtype('float64').

In [26]:
print("best score:", best_score)

trend_type, seasonal_type, damped_trend, init_method, use_boxcox = best_options
print("trend_type", trend_type)
print("seasonal_type", seasonal_type)
print("damped_trend", damped_trend)
print("init_method", init_method)
print("use_boxcox", use_boxcox)

best score: 5389891.524826242
trend_type add
seasonal_type mul
damped_trend True
init_method estimated
use_boxcox 0
