Skip to content

Commit

Permalink
allow percentiles in bt output (#724)
Browse files Browse the repository at this point in the history
  • Loading branch information
wangzhishi committed Mar 9, 2022
1 parent cde4222 commit 2e9baaf
Show file tree
Hide file tree
Showing 3 changed files with 241 additions and 168 deletions.
388 changes: 227 additions & 161 deletions examples/backtest.ipynb

Large diffs are not rendered by default.

9 changes: 5 additions & 4 deletions orbit/diagnostics/backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ def fit_predict(self):
model_copy.fit(train_df)
train_predictions = model_copy.predict(train_df)
test_predictions = model_copy.predict(test_df)
all_pred_cols = [x for x in train_predictions.columns if x!= date_col]

# set attributes
self._fitted_models.append(model_copy)
Expand All @@ -297,13 +298,13 @@ def fit_predict(self):
train_dates = train_df[date_col].rename(BacktestFitKeys.DATE.value, axis='columns')
train_response = train_df[response_col].rename(BacktestFitKeys.ACTUAL.value, axis='columns')
train_values = pd.concat(
(train_dates, train_response, train_predictions[BacktestFitKeys.PREDICTED.value]), axis=1)
(train_dates, train_response, train_predictions[all_pred_cols]), axis=1)
train_values[BacktestFitKeys.TRAIN_FLAG.value] = True
# join test
test_dates = test_df[date_col].rename(BacktestFitKeys.DATE.value, axis='columns')
test_response = test_df[response_col].rename(BacktestFitKeys.ACTUAL.value, axis='columns')
test_values = pd.concat(
(test_dates, test_response, test_predictions[BacktestFitKeys.PREDICTED.value]), axis=1)
(test_dates, test_response, test_predictions[all_pred_cols]), axis=1)
test_values[BacktestFitKeys.TRAIN_FLAG.value] = False
# union train/test
both_values = pd.concat((train_values, test_values), axis=0)
Expand Down Expand Up @@ -341,9 +342,9 @@ def _validate_metric_callables(self, metrics):
if metric_signature == {BacktestFitKeys.ACTUAL.value, BacktestFitKeys.PREDICTED.value}:
continue
elif metric_signature.issubset({
BacktestFitKeys.TEST_ACTUAL.value,
BacktestFitKeys.TEST_ACTUAL.value,
BacktestFitKeys.TEST_PREDICTED.value,
BacktestFitKeys.TRAIN_ACTUAL.value,
BacktestFitKeys.TRAIN_ACTUAL.value,
BacktestFitKeys.TRAIN_PREDICTED.value
}):
continue
Expand Down
12 changes: 9 additions & 3 deletions tests/orbit/diagnostics/test_backtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from orbit.diagnostics.backtest import TimeSeriesSplitter, BackTester
from orbit.diagnostics.metrics import smape, wmape, mape, mse, mae, rmsse
from orbit.diagnostics.metrics import smape, wmape, mae
from orbit.models import LGT, KTRLite

@pytest.mark.parametrize(
Expand Down Expand Up @@ -189,6 +189,8 @@ def test_backtester_with_training_data(iclaims_training_data):
date_col='week',
seasonality=1,
verbose=False,
n_bootstrap_draws=100,
prediction_percentiles=[10, 90],
estimator='stan-map'
)

Expand All @@ -201,12 +203,16 @@ def test_backtester_with_training_data(iclaims_training_data):
)

backtester.fit_predict()
out = backtester.get_predicted_df()
eval_out = backtester.score(include_training_metrics=True)

expected_out_columns = ['date', 'split_key', 'training_data', 'actual',
'prediction', 'prediction_10', 'prediction_90']
assert set(out.columns.tolist()) == set(expected_out_columns)

evaluated_test_metrics = set(eval_out.loc[~eval_out['is_training_metric'], 'metric_name'].tolist())
evaluated_train_metrics = set(eval_out.loc[eval_out['is_training_metric'], 'metric_name'].tolist())

expected_test_metrics = [x.__name__ for x in backtester._default_metrics]

expected_train_metrics = list(filter(
lambda x: backtester._get_metric_callable_signature(x) == {'actual', 'prediction'}, backtester._default_metrics)
)
Expand Down

0 comments on commit 2e9baaf

Please sign in to comment.