# Model Selection with WBIC

In [14]:
from datetime import timedelta

import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
%matplotlib inline

import orbit
print(orbit.__version__)
from orbit.models import DLT,ETS, KTRLite, LGT
from orbit.utils.simulation import make_trend, make_regression

1.1.0dev


In [2]:
orbit.__version__

'1.1.0dev'

In [3]:
%load_ext autoreload
%autoreload 2

Generate a regression problem with trend with `8` number of regressors where only `3` of them are effective. First, generate the `3` effective regressors.

In [27]:
NUM_OF_REGRESSORS = 8
NUM_OF_EFFECTIVE_REGRESSORS = 3
SERIES_LEN = 100
SEED = 20210101
# sample some coefficients
COEFS = np.random.default_rng(SEED).uniform(-1, 1, NUM_OF_EFFECTIVE_REGRESSORS)
trend = make_trend(SERIES_LEN, rw_loc=0.01, rw_scale=0.1)
x, regression, coefs = make_regression(series_len=SERIES_LEN, coefs=COEFS)
print(regression.shape, x.shape)

# combine trend and the regression
y = trend + regression
y = y - y.min()

(100,) (100, 3)


We can add `5` irrelevant regressors into the dataset to add challenge in selecting the best model.

In [28]:
x_extra = np.random.normal(0, 1, (SERIES_LEN, NUM_OF_REGRESSORS - NUM_OF_EFFECTIVE_REGRESSORS))
x = np.concatenate([x, x_extra], axis=-1)
print(x.shape)

(100, 8)


In [29]:
x_cols = [f"x{x}" for x in range(1, NUM_OF_REGRESSORS + 1)]
response_col = "y"
dt_col = "date"
obs_matrix = np.concatenate([y.reshape(-1, 1), x], axis=1)
# make a data frame for orbit inputs
df = pd.DataFrame(obs_matrix, columns=[response_col] + x_cols)
# make some dummy date stamp
dt = pd.date_range(start='2016-01-04', periods=SERIES_LEN, freq="1W")
df['date'] = dt
print(df.shape)
print(df.head())

(100, 10)
          y        x1        x2        x3        x4        x5        x6  \
0  3.010526  0.172792  0.000000  0.165219 -1.768846  0.075552 -1.130630   
1  3.448303 -0.000000  0.452678  0.223187 -1.274101 -0.061154  0.064514   
2  3.012242 -0.000000  0.290559  0.182286 -0.801334  1.312035  1.274699   
3  1.422250  0.147066  0.014211  0.273356 -1.444821 -0.368961 -0.769227   
4  3.275717 -0.368227 -0.081455 -0.241060  2.089979  0.041971 -0.048341   

         x7        x8       date  
0 -0.651430 -0.893116 2016-01-10  
1  0.410113 -0.572882 2016-01-17  
2 -1.214358  0.313719 2016-01-24  
3  0.392616  0.057294 2016-01-31  
4 -0.513154 -0.084589 2016-02-07  


In [30]:
 regressor_col = x_cols[:3 + 1]


In [22]:
dlt_mod = DLT(
        response_col=response_col,
        date_col=dt_col,
        regressor_col=regressor_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        level_sm_input=0.01,
        slope_sm_input=0.01,
        num_warmup=4000,
        num_sample=4000,

    )
dlt_mod.fit_WBIC(df=df) #, , sampling_temperature = np.log(100.0)


Gradient evaluation took 0.000347 seconds
1000 transitions using 10 leapfrog steps per transition would take 3.47 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.000309 seconds
1000 transitions using 10 leapfrog steps per transition would take 3.09 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)

Gradient evaluation took 0.000239 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.39 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.000272 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.72 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [



-165.8260620655938

In [25]:
ets_mod = ETS(
        response_col=response_col,
        date_col=dt_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        level_sm_input=0.01,
        num_warmup=4000,
        num_sample=4000,

    )

ets_mod.fit_WBIC(df)


Gradient evaluation took 9.5e-05 seconds

Gradient evaluation took 0.000108 seconds
1000 transitions using 10 leapfrog steps per transition would take 1.08 seconds.
Adjust your expectations accordingly!


1000 transitions using 10 leapfrog steps per transition would take 0.95 seconds.
Adjust your expectations accordingly!



Iteration:    1 / 2000 [  0%]  (Warmup)
Gradient evaluation took 9.6e-05 seconds
1000 transitions using 10 leapfrog steps per transition would take 0.96 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)

Gradient evaluation took 9.6e-05 seconds
1000 transitions using 10 leapfrog steps per transition would take 0.96 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [ 20



-142.83880824930614

In [24]:
KTRLite_mod = KTRLite(
        response_col=response_col,
        date_col=dt_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        #level_sm_input=0.01,
        estimator='stan-map',
        #num_warmup=4000,
        #num_sample=4000,

    )

KTRLite_mod.fit(df)

Initial log joint probability = -669.995


<orbit.forecaster.map.MAPForecaster at 0x1464b86d0>

    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      19      -138.858     0.0607337       37.8001      0.1174       0.801       26   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      39      -137.425   0.000767978       30.2634   3.311e-05       0.001      121  LS failed, Hessian reset 
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      59      -137.407   0.000241534       33.9523       4.224      0.4224      149   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      79      -137.405   7.02049e-07       31.6083       0.264      0.8429      180   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99      -137.405   3.69224e-08       34.0696      0.9116      0.9116      210   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # eva

In [32]:
LGT_mod = LGT(
        response_col=response_col,
        date_col=dt_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        #level_sm_input=0.01,
        estimator='stan-mcmc',
        num_warmup=4000,
        num_sample=4000,

    )

LGT_mod.fit_WBIC(df)


Gradient evaluation took 0.000259 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.59 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.000276 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.76 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)

Gradient evaluation took 0.000228 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.28 seconds.
Adjust your expectations accordingly!



Gradient evaluation took 0.000244 seconds
1000 transitions using 10 leapfrog steps per transition would take 2.44 seconds.
Adjust your expectations accordingly!


Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:    1 / 2000 [  0%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  200 / 2000 [ 10%]  (Warmup)
Iteration:  400 / 2000 [



Iteration: 2000 / 2000 [100%]  (Sampling)

 Elapsed Time: 0.599946 seconds (Warm-up)
               0.53226 seconds (Sampling)
               1.13221 seconds (Total)

Iteration: 2000 / 2000 [100%]  (Sampling)

 Elapsed Time: 0.606032 seconds (Warm-up)
               0.596136 seconds (Sampling)
               1.20217 seconds (Total)



-144.82307440011647

In [None]:
ets_mod.fit_WBIC(df=df)

In [None]:
ets_mod = ETS(
        response_col=response_col,
        date_col=dt_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        level_sm_input=0.01,
        num_warmup=4000,
        num_sample=4000,

    )
ets_mod.fit(df=df,sampling_temperature = np.log(100.0)) #, , sampling_temperature = np.log(100.0)

In [None]:
ets_mod.get_WBIC_value()

In [None]:
ForecasterException: Model class: <class 'orbit.template.dlt.DLTModel'> is incompatible with 
        Estimator: <class 'orbit.estimators.stan_estimator.StanEstimatorMCMC'>.  
            Estimator Support: [<class 'orbit.estimators.stan_estimator.StanEstimatorMAP'>, 
                                <class 'orbit.estimators.stan_estimator.StanEstimatorMCMC'>
                                <class 'orbit.estimators.stan_estimator.StanEstimatorMCMC'>]

Now, we can calculate WBIC and compare them across models.

In [None]:
%%time
wbics = np.empty(NUM_OF_REGRESSORS)

for idx in range(NUM_OF_REGRESSORS):
    regressor_col = x_cols[:idx + 1]

    dlt_mod = DLT(
        response_col=response_col,
        date_col=dt_col,
        regressor_col=regressor_col,
        seed=2020,
        # fixing the smoothing parameters to learn regression coefficients more effectively
        level_sm_input=0.01,
        slope_sm_input=0.01,
        num_warmup=4000,
        num_sample=4000,
    )
    dlt_mod.fit(df=df)
    wbic = dlt.get_training_metrics()['WBIC']
    print('Regressors:{} WBIC:{:.5f}'.format(regressor_col, wbic))
    wbics[idx] = wbic

We plot the chart with WBICs against number of regressors included.  As we can see, WBIC is lowest when regressors overlapped exactly with the truth.

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
ax.plot(np.arange(1, NUM_OF_REGRESSORS + 1), wbics, color='dodgerblue', label='WBICs')
ax.axvline(x=3, linestyle='--', color='orange', label='truth')
ax.set_xlabel('Number of Regressors')
ax.set_ylabel('WBIC')
ax.set_title('Model Selection with WBIC')
fig.legend()