# Local-Global-Trend(LGT) Example

In [18]:
import pandas as pd
import numpy as np
from orbit.models.lgt import LGTMAP, LGTAggregated, LGTFull
# from orbit.utils.plot import plot_predicted_data
# from orbit.utils.plot import plot_predicted_components

## Data

*iclaims_example* is a dataset containing the weekly initial claims for US unemployment benefits against a few related google trend queries from Jan 2010 - June 2018. This aims to mimick the dataset from the paper "Predicting the Present with Bayesian Structural Time Series" by SCOTT and VARIAN (2014).

Number of claims are obtained from [Federal Reserve Bank of St. Louis](https://fred.stlouisfed.org/series/ICNSA) while google queries are obtained through [Google Trends API](https://trends.google.com/trends/?geo=US).

In [5]:
DATA_FILE = "./data/iclaims_example.csv"

In [6]:
raw_df = pd.read_csv(DATA_FILE, parse_dates=['week'])

In [7]:
raw_df.dtypes

week              datetime64[ns]
claims                     int64
trend.unemploy           float64
trend.filling            float64
trend.job                float64
dtype: object

In [8]:
raw_df.head(5)

Unnamed: 0,week,claims,trend.unemploy,trend.filling,trend.job
0,2010-01-03,651215,1.183973,0.72014,1.119669
1,2010-01-10,825891,1.183973,0.814896,1.178599
2,2010-01-17,659173,1.203382,0.739091,1.119669
3,2010-01-24,507651,1.164564,0.814896,1.107883
4,2010-01-31,538617,1.086926,0.776993,1.072525


### Train / Test Split

In [9]:
df=raw_df.copy()
test_size=52
train_df=df[:-test_size]
test_df=df[-test_size:]

In [10]:
train_df.head(5)

Unnamed: 0,week,claims,trend.unemploy,trend.filling,trend.job
0,2010-01-03,651215,1.183973,0.72014,1.119669
1,2010-01-10,825891,1.183973,0.814896,1.178599
2,2010-01-17,659173,1.203382,0.739091,1.119669
3,2010-01-24,507651,1.164564,0.814896,1.107883
4,2010-01-31,538617,1.086926,0.776993,1.072525


## MAP Fit and Predict

In [12]:
lgt_map=LGTMAP(
    response_col="claims",
    date_col="week",
    seasonality=52,
    seed=8888,
    is_multiplicative=True
)

In [13]:
%%time
lgt_map.fit(df=train_df)

CPU times: user 108 ms, sys: 13.7 ms, total: 122 ms
Wall time: 516 ms


In [14]:
predicted_df = lgt_map.predict(df=test_df)

In [16]:
predicted_df.tail(3)

Unnamed: 0,week,prediction
49,2018-06-10,210191.544272
50,2018-06-17,203527.624806
51,2018-06-24,213169.074378


In [15]:
# plot_predicted_data(training_actual_df=train_df, predicted_df=predicted_df, 
#                     date_col=lgt_map.date_col, actual_col=lgt_map.response_col, pred_col='prediction', 
#                     test_actual_df=test_df)

NameError: name 'plot_predicted_data' is not defined

In [12]:
# test_df.head(5)

Unnamed: 0,week,claims,trend.unemploy,trend.filling,trend.job
391,2017-07-02,252886,1.183973,1.099162,0.919307
392,2017-07-09,284329,1.222792,1.023357,1.025381
393,2017-07-16,257763,1.145154,1.061259,0.990023
394,2017-07-23,220455,1.125745,1.023357,1.025381
395,2017-07-30,198776,1.145154,1.042308,1.048953


## MCMC Fit and Predict

In [20]:
lgt = LGTFull(
    response_col='claims',
    date_col='week',
    seasonality=52,
    seed=8888,
    num_warmup=4000, 
    num_sample=500,
    n_bootstrap_draws=-1,
)

In [21]:
%%time
lgt.fit(df=train_df)

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


CPU times: user 138 ms, sys: 156 ms, total: 294 ms
Wall time: 21.6 s


In [22]:
predicted_df = lgt.predict(df=df)

In [23]:
predicted_df.tail(3)

Unnamed: 0,week,prediction
440,2018-06-10,212994.00885
441,2018-06-17,206220.015911
442,2018-06-24,215084.340358


In [24]:
# plot_predicted_data(training_actual_df=train_df, predicted_df=predicted_df, 
#                     date_col=lgt_mcmc.date_col, actual_col=lgt_mcmc.response_col, pred_col=50, 
#                     pred_quantiles_col=[5,95], test_actual_df=test_df)

## Mean Fit

In [30]:
lgt_agg=LGTAggregated(
    response_col='claims',
    date_col='week',
    seasonality=52,
    seed=8888,
    num_warmup=4000, 
    num_sample=500,
)

In [31]:
%%time
lgt_agg.fit(df=train_df)

To run all diagnostics call pystan.check_hmc_diagnostics(fit)


CPU times: user 157 ms, sys: 177 ms, total: 334 ms
Wall time: 24.6 s


In [32]:
predicted_df = lgt_agg.predict(df=test_df)

In [33]:
predicted_df.tail(3)

Unnamed: 0,week,prediction
49,2018-06-10,203619.282037
50,2018-06-17,197076.808814
51,2018-06-24,206350.234325


In [29]:
# plot_predicted_data(training_actual_df=train_df, predicted_df=predicted_df, 
#                     date_col=lgt_mean.date_col, actual_col=lgt_mean.response_col, pred_col='prediction', 
#                     test_actual_df=test_df)

## Fit with Regressors & Predict with Decomp

### Fit with Regressors

In [34]:
lgt_reg=LGTMAP(
    response_col='claims',
    regressor_col=['trend.unemploy', 'trend.filling'],
    date_col='week',
    seasonality=52,
    seed=8888,
)

In [35]:
%%time
lgt_reg.fit(df=train_df)

CPU times: user 178 ms, sys: 10.6 ms, total: 189 ms
Wall time: 204 ms


### Decompose Prediction

We can use `decompose=True` to decompose prediction into `trend`, `seasonality` and `regression` components. This command only works with point estimate.

In [37]:
# make prediction of past and future
predicted_df = lgt_reg.predict(df=df, decompose=True)
predicted_df.head(3)

Unnamed: 0,week,prediction,trend,seasonality,regression
0,2010-01-03,651215.0,495113.989912,1.338897,0.982363
1,2010-01-10,826286.13504,483623.583515,1.722778,0.991731
2,2010-01-17,650852.245373,481960.580029,1.370954,0.985027


In [None]:
# plot_predicted_components(predicted_df=predicted_df, date_col='week')