<b><font size=5> Backtest Misc

Examples of backtesting for 
* multiple series 
* multiple models

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')

from orbit.lgt import LGT
from orbit.dlt import DLT
from orbit.backtest.backtest import TimeSeriesSplitter, Backtest
from orbit.utils.metrics import mape, smape, wmape
from orbit.backtest.multibacktest import run_multi_series_backtest

## Load data

In [3]:
data_path = "../examples/data/m4_weekly_example.csv"
raw_data = pd.read_csv(data_path, parse_dates=['date'])

In [4]:
data = raw_data.copy()
print(data.shape)
data.head(5)

(5435, 4)


Unnamed: 0,key,week_num,value,date
0,W1,2,1089.2,1970-01-05
1,W1,3,1078.91,1970-01-12
2,W1,4,1079.88,1970-01-19
3,W1,5,1063.58,1970-01-26
4,W1,6,1060.61,1970-02-02


## Settings

In [5]:
data_settings = {
    "date_col": "date",
    "response_col": "value",
    "key_col": "key",
    "seasonality": 52
}
    
bt_settings = {
    "min_train_len": 120,
    "forecast_len": 13,
    "incremental_len": 5,
    "n_splits": 1,
    "scheme": "expanding",
    "seed": 2019,
    "metric_horizon":[1, 4, 13]
    
}

In [6]:
# data settings
DATE_COL = data_settings['date_col']
RESPONSE_COL = data_settings['response_col']
KEY_COL = data_settings['key_col']
SEASONALITY = data_settings['seasonality']

# backtest settings
MIN_TRAIN_LEN = bt_settings['min_train_len']
FORECAST_LEN = bt_settings['forecast_len']
INCREMENTAL_LEN = bt_settings['incremental_len']
N_SPLITS = bt_settings['n_splits']
WINDOW_TYPE = bt_settings['scheme']
SEED = bt_settings['seed']
METRIC_HORIZON = bt_settings['metric_horizon']

In [14]:
lgt_map = LGT(
    response_col=RESPONSE_COL,
    date_col=DATE_COL,
    seasonality=SEASONALITY,
    seed=SEED,
    infer_method='map',
    predict_method='map',
    is_multiplicative=True
)

dlt_map = DLT(
    response_col=RESPONSE_COL,
    date_col=DATE_COL,
    seasonality=SEASONALITY,
    seed=SEED,
    infer_method='map',
    predict_method='map',
    global_trend_option='loglinear',
    is_multiplicative=True
)


In [15]:
bt_models = {
    'LGT-MAP': lgt_map,
    'DLT-MAP': dlt_map
}

## Backtest Multiple Series

In [9]:
bt_result, bt_scores = run_multi_series_backtest(
    data=data, 
    response_col=RESPONSE_COL, 
    key_col=KEY_COL, 
    date_col=DATE_COL,
    model=lgt_map,
    min_train_len=MIN_TRAIN_LEN, 
    incremental_len=INCREMENTAL_LEN, 
    forecast_len=FORECAST_LEN, 
    predicted_col='prediction', 
    n_splits=N_SPLITS,
    window_type = WINDOW_TYPE
)


100%|██████████| 3/3 [00:06<00:00,  2.07s/it]


In [10]:
bt_scores.head()

Unnamed: 0,wmape,smape,n_splits,key
0,0.021875,0.021738,1,W1
1,0.061956,0.063177,1,W10
2,0.009357,0.009349,1,W100


## Backtest Multiple Models (and Multiple Series)

In [11]:
bt_result_list = []
bt_scores_list = []
for mod_name, mod in bt_models.items():
    bt_result, bt_scores = run_multi_series_backtest(
        data=data, 
        response_col=RESPONSE_COL, 
        key_col=KEY_COL, 
        date_col=DATE_COL,
        model=mod,
        min_train_len=MIN_TRAIN_LEN, 
        incremental_len=INCREMENTAL_LEN, 
        forecast_len=FORECAST_LEN, 
        predicted_col='prediction', 
        n_splits=N_SPLITS,
        window_type = WINDOW_TYPE,
    )
    bt_result['model'] = mod_name
    bt_scores['model'] = mod_name
    bt_result_list.append(bt_result)
    bt_scores_list.append(bt_scores)

100%|██████████| 3/3 [00:06<00:00,  2.10s/it]
100%|██████████| 3/3 [00:03<00:00,  1.27s/it]


In [12]:
bt_result = pd.concat(bt_result_list, axis=0)
bt_scores = pd.concat(bt_scores_list, axis=0)

In [13]:
bt_scores.head()

Unnamed: 0,wmape,smape,n_splits,key,model
0,0.021875,0.021738,1,W1,LGT-MAP
1,0.061956,0.063177,1,W10,LGT-MAP
2,0.009357,0.009349,1,W100,LGT-MAP
0,0.014449,0.014422,1,W1,DLT-MAP
1,0.074142,0.076276,1,W10,DLT-MAP
