In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

##### Read and Prepare Data

In [2]:
# Read data
df = pd.read_csv('/home/filtheo/DeepRetail/DeepRetail/data/data_folder/m5_day.csv', index_col=0)
df.head()

Unnamed: 0_level_0,2011-01-29,2011-01-30,2011-01-31,2011-02-01,2011-02-02,2011-02-03,2011-02-04,2011-02-05,2011-02-06,2011-02-07,...,2016-04-15,2016-04-16,2016-04-17,2016-04-18,2016-04-19,2016-04-20,2016-04-21,2016-04-22,2016-04-23,2016-04-24
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HOBBIES_1_001_CA_1,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
HOBBIES_1_002_CA_1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
HOBBIES_1_003_CA_1,0,0,0,0,0,0,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
HOBBIES_1_004_CA_1,0,0,0,0,0,0,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
HOBBIES_1_005_CA_1,0,0,0,0,0,0,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [3]:
# Get the monthly frequency for simplicity
df.columns = pd.to_datetime(df.columns)
df = df.resample('M', axis = 1).sum()

In [4]:
# Get a small sample
sampled_df = df.sample(3)
sampled_df.head()

Unnamed: 0_level_0,2011-01-31,2011-02-28,2011-03-31,2011-04-30,2011-05-31,2011-06-30,2011-07-31,2011-08-31,2011-09-30,2011-10-31,...,2015-07-31,2015-08-31,2015-09-30,2015-10-31,2015-11-30,2015-12-31,2016-01-31,2016-02-29,2016-03-31,2016-04-30
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HOUSEHOLD_2_355_WI_3,0,3,8,7,4,9,7,3,6,9,...,11,5,4,3,8,5,5,4,11,4
HOUSEHOLD_1_188_CA_4,0,0,0,0,0,0,0,0,10,14,...,17,16,22,17,19,27,29,26,28,22
FOODS_3_127_CA_1,17,113,97,116,139,126,101,58,76,87,...,183,123,113,101,98,136,81,102,170,117


##### Use THieF

In [5]:
from DeepRetail.reconciliation.temporal import THieF

  from tqdm.autonotebook import tqdm


In [6]:
# Define parameters
bottom_level_freq = 'M'
model = 'ETS'

#### Example 1: No Holdout

In [14]:
holdout = False
cv = None

In [15]:
# Define THieF
thief = THieF(bottom_level_freq = bottom_level_freq, holdout = holdout)

In [16]:
# fit thief
thief.fit(sampled_df, format = 'pivoted')

In [17]:
# predict base forecasts (also get residuals)
base_fc_1 = thief.predict(model)
res_1 = thief.base_forecast_residuals

In [18]:
# reconcile
method = 'struc'
rec_fc_1 = thief.reconcile(method = method)

In [19]:
rec_fc_1.head()

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base
0,1457.657633,FOODS_3_127_CA_1,TR-struc-AutoETS,12,1,1470.0
1,260.147553,HOUSEHOLD_1_188_CA_4,TR-struc-AutoETS,12,1,252.0
2,67.981082,HOUSEHOLD_2_355_WI_3,TR-struc-AutoETS,12,1,66.0
3,729.885074,FOODS_3_127_CA_1,TR-struc-AutoETS,6,1,704.006226
4,130.073777,HOUSEHOLD_1_188_CA_4,TR-struc-AutoETS,6,1,126.195267


#### Example 2: Holdout & CV

In [8]:
holdout = True
cv = 3

In [9]:
# Define THieF
thief = THieF(bottom_level_freq = bottom_level_freq, cv = cv, holdout = holdout)

# fit thief
thief.fit(sampled_df, format = 'pivoted')

# Get base forecasts and residuals
base_fc_2 = thief.predict(model)
res_2 = thief.base_forecast_residuals

# Reconcile base forecasts
# Using mse for reconciliation
method = 'mse'
reconciled_2 = thief.reconcile(method = method)

In [10]:
reconciled_2.head()

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base,cv,date,y_true
0,1576.88469,FOODS_3_127_CA_1,TR-mse-AutoETS,12,1,1571.0,1,2014-12-31,
1,219.677717,HOUSEHOLD_1_188_CA_4,TR-mse-AutoETS,12,1,221.0,1,2014-12-31,
2,86.616381,HOUSEHOLD_2_355_WI_3,TR-mse-AutoETS,12,1,83.0,1,2014-12-31,
3,788.329069,FOODS_3_127_CA_1,TR-mse-AutoETS,6,1,819.993103,1,2015-03-31,823.0
4,109.838858,HOUSEHOLD_1_188_CA_4,TR-mse-AutoETS,6,1,120.125,1,2015-03-31,112.0


##### Use the Temporal Reconciler

In [11]:
from DeepRetail.reconciliation.temporal import TemporalReconciler

In [33]:
# Example 1:
# Reconcile wihtout holdout set
# Reconcile with structural scaling
holdout = False
cv = None

In [34]:
# Define the TemporalReconciler
temporal_reconciler = TemporalReconciler(bottom_level_freq = bottom_level_freq, holdout = holdout)

In [35]:
# Fit the reconcilcer
temporal_reconciler.fit(base_fc_1)

In [36]:
# Reconcile
reconciled = temporal_reconciler.reconcile('struc')

In [37]:
reconciled.head()

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base
0,1457.657633,FOODS_3_127_CA_1,TR-struc-AutoETS,12,1,1470.0
1,260.147553,HOUSEHOLD_1_188_CA_4,TR-struc-AutoETS,12,1,252.0
2,67.981082,HOUSEHOLD_2_355_WI_3,TR-struc-AutoETS,12,1,66.0
3,729.885074,FOODS_3_127_CA_1,TR-struc-AutoETS,6,1,704.006226
4,130.073777,HOUSEHOLD_1_188_CA_4,TR-struc-AutoETS,6,1,126.195267


In [38]:
# Example 2:
# Reconcile with a holdout set
# Reconcile using mse
holdout = True
cv = 3

In [40]:
# Define the TemporalReconciler
temporal_reconciler = TemporalReconciler(bottom_level_freq = bottom_level_freq, holdout = holdout, cv = cv)

# Fit the reconcilcer
temporal_reconciler.fit(base_fc_2)

# Reconcile
reconciled = temporal_reconciler.reconcile('mse', residual_df = res_2)

In [41]:
reconciled.head()

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base,cv,date,y_true
0,1576.88469,FOODS_3_127_CA_1,TR-mse-AutoETS,12,1,1571.0,1,2014-12-31,
1,219.677717,HOUSEHOLD_1_188_CA_4,TR-mse-AutoETS,12,1,221.0,1,2014-12-31,
2,86.616381,HOUSEHOLD_2_355_WI_3,TR-mse-AutoETS,12,1,83.0,1,2014-12-31,
3,788.329069,FOODS_3_127_CA_1,TR-mse-AutoETS,6,1,819.993103,1,2015-03-31,823.0
4,109.838858,HOUSEHOLD_1_188_CA_4,TR-mse-AutoETS,6,1,120.125,1,2015-03-31,112.0
