In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt

##### Read and Prepare Data

In [2]:
# Read data
df = pd.read_csv('/home/filtheo/DeepRetail/DeepRetail/data/data_folder/m5_day.csv', index_col=0)
df.head()

Unnamed: 0_level_0,2011-01-29,2011-01-30,2011-01-31,2011-02-01,2011-02-02,2011-02-03,2011-02-04,2011-02-05,2011-02-06,2011-02-07,...,2016-04-15,2016-04-16,2016-04-17,2016-04-18,2016-04-19,2016-04-20,2016-04-21,2016-04-22,2016-04-23,2016-04-24
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HOBBIES_1_001_CA_1,0,0,0,0,0,0,0,0,0,0,...,1,3,0,1,1,1,3,0,1,1
HOBBIES_1_002_CA_1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
HOBBIES_1_003_CA_1,0,0,0,0,0,0,0,0,0,0,...,2,1,2,1,1,1,0,1,1,1
HOBBIES_1_004_CA_1,0,0,0,0,0,0,0,0,0,0,...,1,0,5,4,1,0,1,3,7,2
HOBBIES_1_005_CA_1,0,0,0,0,0,0,0,0,0,0,...,2,1,1,0,1,1,2,2,2,4


In [3]:
# Get the monthly frequency for simplicity
df.columns = pd.to_datetime(df.columns)
df = df.resample('M', axis = 1).sum()

In [4]:
# Get a small sample
sampled_df = df.sample(15)
sampled_df.head()

Unnamed: 0_level_0,2011-01-31,2011-02-28,2011-03-31,2011-04-30,2011-05-31,2011-06-30,2011-07-31,2011-08-31,2011-09-30,2011-10-31,...,2015-07-31,2015-08-31,2015-09-30,2015-10-31,2015-11-30,2015-12-31,2016-01-31,2016-02-29,2016-03-31,2016-04-30
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
HOUSEHOLD_1_460_CA_1,10,100,79,48,47,36,54,53,51,71,...,19,17,21,19,22,32,22,24,50,34
FOODS_3_712_WI_2,2,65,71,39,24,15,12,14,17,26,...,28,11,9,46,37,34,44,37,40,46
HOUSEHOLD_2_326_CA_1,3,7,10,12,18,11,0,0,5,14,...,7,10,9,11,9,6,2,7,12,7
HOBBIES_1_235_WI_1,0,0,0,0,22,77,110,156,140,73,...,63,63,56,65,55,53,50,55,53,44
FOODS_3_758_WI_2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,13,5,2,8,8,9,8


In [5]:
# Will use statsforecast!!
from statsforecast import StatsForecast
from statsforecast.models import AutoETS, AutoARIMA, Naive, SeasonalNaive
import numpy as np
import warnings
from DeepRetail.forecasting.utils import get_numeric_frequency
from DeepRetail.transformations.formats import sktime_forecast_format, transaction_df

  from tqdm.autonotebook import tqdm


In [6]:
def statsforecast_forecast_format(df, format = 'transaction'):
    """
    Converts a dataframe to the format required for forecasting with statsforecast.

    Args:
        df : pd.DataFrame
            The input data.
        format : str, default='transaction'
            The format of the input data. Can be 'transaction' or 'pivotted'.
    
    Returns:
        df : pd.DataFrame
            The formatted dataframe.
    
    """

    # if we have transaction
    if format == 'transaction':
        # just rename the date column to ds
        df = df.rename(columns = {'date': 'ds'})
    elif format == 'pivotted':
        # if we have pivotted
        # we need to convert it to transaction
        df = transaction_df(df, drop_zeros = False)
        # and rename the date column to ds
        df = df.rename(columns = {'date': 'ds'})
    else:
        raise ValueError('Provide the dataframe either in pivoted or transactional format.')
    
    # Return 
    return df

In [8]:
class StatisticalForecaster(object):

    """
    A class for time series forecasting using statistical methods.

    Parameters:
        models : list
            A list of model names to use for forecasting.
            Currently only 'Naive', 'SNaive', 'ARIMA' and 'ETS' are supported.
        freq : str
            The frequency of the time series data.
        n_jobs : int, optional (default=-1)
            The number of parallel jobs to run during model fitting.

    Args:
        freq : str
            The frequency of the data.
        seasonal_length : int
            The length of the seasonal pattern.
        n_jobs : int
            The number of jobs to run in parallel for the fitting process.
        fitted_models : list
            A list of models that have been fitted.
        model_names : list
            A list of the names of the models that have been fitted.
        fc_df : pd.DataFrame
            The formatted forecast dataframe.
        fh : np.ndarray
            The forecast horizon.
        cv : int
            The number of cross-validation folds.
        y_train : pd.DataFrame
            The training data.
        y_test : pd.DataFrame
            The test data.
        cross_validator : SlidingWindowSplitter
            The cross-validation object.
        forecast_df : pd.DataFrame
            The forecast dataframe, including predicted values and any available true values.

    Methods:
        fit(df, format='pivoted')
            Fits the models to the time series data.
        predict(h, cv=1, holdout=True)
            Generates forecasts for a future period.
        get_model_predictions(model, name)
            Generates forecasts for a future period using a specific model.
        add_fh_cv()
            Adds the forecasting horizon and cross-validation fold numbers to the forecast DataFrame.


    Examples:
        # Create the forecaster
        >>> models = ["ETS"]
        >>> freq = "M"
        >>> n_jobs = -1
        >>> forecaster = StatisticalForecaster(models, freq, n_jobs)

        # Fit the forecaster
        >>> df = pd.read_csv("data.csv")
        >>> forecaster.fit(df, format="pivoted")

        # Generate predictions
        >>> h = 12
        >>> cv = 3
        >>> holdout = True
        >>> predictions = forecaster.predict(h, cv, holdout)


    """

    def __init__(self, models, freq, n_jobs=-1, warning=False, seasonal_length=None):
        """
        Initialize the StatisticalForecaster object.

        Args:
            models: list
                A list of models to fit. Currently only ETS is implemented.
            freq: str
                The frequency of the data, e.g. 'D' for daily or 'M' for monthly.
            n_jobs: int, default=-1
                The number of jobs to run in parallel for the fitting process.
            warning: bool, default=False
                Whether to show warnings or not.
            seasonal_length: int, default=None
                The length of the seasonal pattern.
                If None, the seasonal length is inferred from the frequency.
                On frequencies with multiple seasonal patterns, the first seasonal pattern is used.

        """
        self.freq = freq
        if seasonal_length is not None:
            self.seasonal_length = seasonal_length
        else:
            self.seasonal_length = get_numeric_frequency(freq)
            # Check if it returns multiple seasonal lengths
            if isinstance(self.seasonal_length, list):
                # take the first
                self.seasonal_length = self.seasonal_length[0]
        self.n_jobs = n_jobs

        # Set the warnings
        if not warning:
            warnings.filterwarnings("ignore")

        # Add the models and their names
        models_to_fit = []
        model_names = []

        # Append to the list
        if "Naive" in models:
            models_to_fit.append(Naive())
            model_names.append("Naive")
        if "SNaive" in models:
            models_to_fit.append(
                SeasonalNaive (season_length=self.seasonal_length)
            )
            model_names.append("Seasonal Naive")
        if "ARIMA" in models:
            models_to_fit.append(
                AutoARIMA(season_length=self.seasonal_length)
            )
            model_names.append("ARIMA")
        if "ETS" in models:
            models_to_fit.append(
                AutoETS(season_length =self.seasonal_length)
            )
            model_names.append("ETS")

        self.fitted_models = models_to_fit
        self.model_names = model_names

        
    def fit(self, df, format="pivoted", fallback = True, verbose = False):
        """
        Fit the model to given the data.

        Args:
            df : pd.DataFrame
                The input data.
            format : str, default='pivoted'
                The format of the input data. Can be 'pivoted' or 'transactional'.
            fallback : bool, default=True
                Whether to fallback to the default model if the model fails to fit.
                Default selection is Naive
            verbose : bool, default=False
                Whether to show the progress of the model fitting.
        Raises:
            ValueError : If the format is not 'pivoted' or 'transactional'.

        """

        if format == "pivoted":
            fc_df = transaction_df(df, drop_zeros=False)
        elif format == "transactional":
            fc_df = df.copy()
        else:
            raise ValueError(
                "Provide the dataframe either in pivoted or transactional format."
            )

        # convert to the right format for forecasting
        fc_df = statsforecast_forecast_format(fc_df)

        # Define the StatsForecaster
        if fallback:
            self.forecaster = StatsForecast(
                df = fc_df,
                models = self.fitted_models,
                freq = self.freq,
                n_jobs = self.n_jobs,
                fallback_model=Naive()
            )
        else:
            self.forecaster = StatsForecast(
                df = fc_df,
                models = self.fitted_models,
                freq = self.freq,
                n_jobs = self.n_jobs,
            )


        # Add to the object
        self.fc_df = fc_df



    def predict(self, h, cv=1, step_size = 1, refit = True, holdout=True):
        """
        Generates predictions using the statistical forecaster.

        Args:
            h : int
                The forecast horizon (i.e., how many time periods to forecast into the future).
            cv : int, optional (default=1)
                The number of cross-validation folds to use. If set to 1, no cross-validation is performed.
            step_size : int, optional (default=1)
                The step size to use for cross-validation. If set to 1, the cross-validation folds are non-overlapping
            refit : bool, optional (default=True)
                Weather to refit the model at each cross-validation. Avoid for big datasets.
            holdout : bool, optional (default=True)
                If True, a holdout set is used for testing the model. If False, the model is fit on the entire data.

        Raises:
            ValueError : If cv > 1 and holdout is False.

        Returns:
            pandas.DataFrame
            The forecasted values, along with the true values (if holdout=True).

        """
        if not holdout and cv > 1:
            raise ValueError("Cannot do cross validation without holdout.")
        
        if holdout and cv == None:
            cv = 1

        # Add to the object
        self.cv = cv
        self.h = h
        self.holdout = holdout
        self.step_size = step_size
        self.refit = refit


        if holdout:
            # Get the cross_validation
            y_pred = self.forecaster.cross_validation(
                df = self.fc_df,
                h = self.h,
                step_size = self.step_size,
                n_windows = self.cv,
                refit = self.refit
            )
            
            # edit the format
            # Reset index and rename
            y_pred = y_pred.reset_index().rename(columns = {"ds": "date",'y':'True'})
            # Melt
            y_pred = pd.melt(y_pred, id_vars = ['unique_id', 'date','cutoff','True'], var_name = 'Model', value_name = 'y')

        else:
            # We just forecast
            y_pred = self.forecaster.forecast(
                df = self.fc_df,
                h = self.h
            )

            # edit the format
            # Reset index and rename
            y_pred = y_pred.reset_index().rename(columns = {"ds": "date"})
            # Melt
            y_pred = pd.melt(y_pred, id_vars = ['unique_id', 'date'], var_name = 'Model', value_name = 'y')

        # Add to the object
        self.forecast_df = y_pred

        # add the fh and cv
        self.add_fh_cv()

        # return
        return self.forecast_df

    def add_fh_cv(self):
        """
        Adds the forecasting horizon and cross-validation information to the forecast results.

        Args:
            None

        """

        # add the number of cv and fh
        if self.holdout:
            cv_vals = sorted(self.forecast_df["cutoff"].unique())
            fh_vals = sorted(self.forecast_df["date"].unique())

            cv_dict = dict(zip(cv_vals, np.arange(1, len(cv_vals) + 1)))
            fh_dict = dict(zip(fh_vals, np.arange(1, len(fh_vals) + 1)))

            self.forecast_df["fh"] = [
                fh_dict[date] for date in self.forecast_df["date"].values
            ]
            self.forecast_df["cv"] = [
                cv_dict[date] for date in self.forecast_df["cutoff"].values
            ]
        else:
            # get the forecasted dates
            dates = self.forecast_df["date"].unique()
            # get a dictionary of dates and their corresponding fh
            fh_dict = dict(zip(dates, np.arange(1, len(dates) + 1)))
            # add the fh
            self.forecast_df["fh"] = [
                fh_dict[date] for date in self.forecast_df["date"].values
            ]
            # also add the cv
            self.forecast_df["cv"] = None

    def calculate_residuals(self):
        """
        Calculate residuals for all horizons.

        Args:
            None

        Returns:
            pandas.DataFrame : The residuals for all models and horizons.

        """

        # Define the end date for the fitting period
        end_date = self.h + self.cv - 1

        # Get the fitting period
        fitting_periods = sorted(self.fc_df['ds'].unique())[:-end_date]
        in_sample_values = self.fc_df[self.fc_df['ds'].isin(fitting_periods)]
        # I am adding a manual threshold to avoid an error when the number of fitting periods is too small
        total_windows = 40 if len(fitting_periods) - self.h + 1 > 40 else len(fitting_periods) - self.h + 1

        # Remove Naive and Seasonal Naive from the models to avoid an error
        temp_models = [model for model in self.fitted_models if model.alias != 'Naive' and model.alias != 'SeasonalNaive']

        # define a new forecaster
        forecaster_residuals = StatsForecast(models=temp_models, df = self.fc_df, freq = self.freq, n_jobs = self.n_jobs, verbose = False)

        # Fit
        _ = forecaster_residuals.cross_validation(h= self.h, n_windows=total_windows, input_size = self.seasonal_length, refit = False, fitted = True)

        # Get the residuals
        res = forecaster_residuals.cross_validation_fitted_values ()

        # Convert to the right format

        # Reset index and rename
        res = res.reset_index().rename(columns = {"ds": "date",'y':'y_true'})

        # Melt
        res = pd.melt(res, id_vars = ['unique_id', 'date','cutoff','y_true'], var_name = 'Model', value_name = 'y_pred')
        
        # add the number of cv and fh
        cv_vals = sorted(res["cutoff"].unique())
        cv_dict = dict(zip(cv_vals, np.arange(1, len(cv_vals) + 1)))
        res["cv"] = [cv_dict[date] for date in res["cutoff"].values]

        # add the fh
        fh_vals = np.tile(np.arange(1, self.h + 1), int(len(res)/self.h))
        res["fh"] = fh_vals

        # add the residuals
        self.residuals = res

        # return
        return self.residuals
    

    def residual_diagnosis(self, model, type, agg_func=None, n=1, index_ids=None):
        """
        Plots the residuals for a given model together with the ACF plot and a histogram.

        Args:
            model : str
                The name of the model to use.
            type : str
                The type of residuals to plot. Can be 'aggregate', 'random' or 'individual'.
                - Aggregate aggregates the residuals given the agg_fun
                - Random takes n random unique_ids
                - Individual takes the unique_ids provided in the index_ids list
            agg_func : str
                The function to use for aggregating the residuals. Only used if type is 'aggregate'.
            n : int
                The number of unique_ids to plot. Only used if type is 'random'.
            index_ids : list
                The list of unique_ids to plot. Only used if type is 'individual'.

        """

        # Get residuals if we haven't already
        if hasattr(self, 'residuals'):    
            res = self.residuals.copy()
        else:
            res = self.calculate_residuals()

        # Add the residual
        res['residual'] = res['y_true'] - res['y_pred']
        self.temp = res
        # filter residuals for the given model
        f_res = res[res['Model'].str.contains(model)]

        # Convert the df to the right format
        # 1st: Keep only 1-step ahead residuals
        f_res = f_res[f_res["fh"] == 1]
        # 2nd: Drop columns and rename
        to_keep = ["date", "unique_id", "residual", "Model"]
        f_res = f_res[to_keep].rename(columns={"date": "Period"})

        # if we have to aggregate
        if type == "aggregate":
            f_res = f_res.groupby(["Model", "Period"]).agg(agg_func).reset_index()
            f_res["unique_id"] = "Aggregate"
            # set n equal to a single output
            n = 1
        elif type == "random":
            # sample n random unique_ids
            ids = np.random.choice(f_res["unique_id"].unique(), n)
            f_res = f_res[f_res["unique_id"].isin(ids)]

        elif type == "individual":
            # take those provided on the index_ids list
            f_res = f_res[f_res["unique_id"].isin(index_ids)]
            n = len(index_ids)

        # Pivot
        f_res = pd.pivot_table(
            f_res,
            index="unique_id",
            columns="Period",
            values="residual",
            aggfunc="first",
        )

        # Plot

        # Extra  values names and periods
        vals = f_res.values
        dates = f_res.columns.values
        # names = f_res.index.values

        # Initialize params
        gray_scale = 0.9

        for idx in range(n):
            fig = plt.figure(figsize=(16, 8), constrained_layout=True)
            gs = GridSpec(2, 2, figure=fig)

            y = vals[idx]
            # name = names[idx]

            # Define axes
            ax1 = fig.add_subplot(gs[0, :])
            ax2 = fig.add_subplot(gs[1, :-1])
            ax3 = fig.add_subplot(gs[1:, -1])

            # Ax1 has the line plot
            ax1.plot(dates, y, label="y", color="black")
            ax1.set_facecolor((gray_scale, gray_scale, gray_scale))
            ax1.grid()

            # Ax2 is the pacf plot
            acf_ = acf(y, nlags=get_numeric_frequency(self.freq), alpha=0.05)
            # splitting acf and the intervals
            acf_x, confint = acf_[:2]
            acf_x = acf_x[1:]
            confint = confint[1:]

            lags_x = np.arange(0, self.seasonal_length)

            ax2.vlines(lags_x, [0], acf_x)
            ax2.axhline()
            ax2.margins(0.05)
            ax2.plot(
                lags_x,
                acf_x,
                marker="o",
                markersize=5,
                markerfacecolor="red",
                markeredgecolor="red",
            )

            # ax.set_ylim(-1, 1)
            # Setting the limits
            ax2.set_ylim(
                1.25 * np.minimum(min(acf_x), min(confint[:, 0] - acf_x)),
                1.25 * np.maximum(max(acf_x), max(confint[:, 1] - acf_x)),
            )

            lags_x[0] -= 0.5
            lags_x[-1] += 0.5
            ax2.fill_between(
                lags_x, confint[:, 0] - acf_x, confint[:, 1] - acf_x, alpha=0.25
            )

            gray_scale = 0.93
            ax2.set_facecolor((gray_scale, gray_scale, gray_scale))
            ax2.grid()

            # title = "ACF" + str(nam)
            # ax2.set_title(title)

            ax3.hist(y, color="black")
            ax3.grid()
            ax3.set_facecolor((gray_scale, gray_scale, gray_scale))

            plt.show()

##### Use THieF

In [9]:
from DeepRetail.reconciliation.temporal import THieF

In [10]:
# Define parameters
bottom_level_freq = 'M'
model = 'ETS'

In [11]:
# Example 1:
# No holdout set
# Structural scaling as reconciliation method
holdout = False

In [12]:
# Define THieF
thief = THieF(bottom_level_freq = bottom_level_freq)

In [13]:
# fit thief
thief.fit(sampled_df, holdout = holdout, format = 'pivoted')

In [14]:
# predict base forecasts (also get residuals)
base_fc_1 = thief.predict(model)
res_1 = thief.base_forecast_residuals



In [15]:
# Reconcile base forecasts
reconciled_1 = thief.reconcile(reconciliation_method = 'struc')

In [16]:
reconciled_1

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base
0,434.355233,FOODS_2_173_CA_1,TR-struc-ETS,12,1,426.167627
1,236.655250,FOODS_2_270_CA_1,TR-struc-ETS,12,1,281.131529
2,309.659080,FOODS_3_183_WI_3,TR-struc-ETS,12,1,89.396315
3,6335.337239,FOODS_3_694_TX_2,TR-struc-ETS,12,1,6240.217023
4,458.710725,FOODS_3_712_WI_2,TR-struc-ETS,12,1,454.199753
...,...,...,...,...,...,...
415,5.771747,HOUSEHOLD_1_261_WI_2,TR-struc-ETS,1,12,2.652389
416,88.067902,HOUSEHOLD_1_379_TX_2,TR-struc-ETS,1,12,87.176401
417,17.626134,HOUSEHOLD_1_460_CA_1,TR-struc-ETS,1,12,6.897244
418,1.960321,HOUSEHOLD_2_290_CA_4,TR-struc-ETS,1,12,2.124701


In [17]:
# Example 2:
# Holdout set used
# Reconciliation method: in-sample mse
holdout = True
cv = 3

In [18]:
# Define THieF
thief = THieF(bottom_level_freq = bottom_level_freq)

# fit thief
thief.fit(sampled_df, holdout = holdout, format = 'pivoted', cv = cv)

# Get base forecasts and residuals
base_fc_2 = thief.predict(model)
res_2 = thief.base_forecast_residuals

# Reconcile base forecasts
reconciled_2 = thief.reconcile(reconciliation_method = 'mse')

In [19]:
reconciled_2

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base,cv,date,cutoff,y_true
0,297.430718,FOODS_2_173_CA_1,TR-mse-ETS,12,1,255.000346,1,2015,2014,455
1,310.612782,FOODS_2_270_CA_1,TR-mse-ETS,12,1,361.512757,1,2015,2014,236
2,8.186486,FOODS_3_183_WI_3,TR-mse-ETS,12,1,24.997500,1,2015,2014,273
3,5821.129949,FOODS_3_694_TX_2,TR-mse-ETS,12,1,6135.862177,1,2015,2014,6609
4,357.884750,FOODS_3_712_WI_2,TR-mse-ETS,12,1,226.363500,1,2015,2014,364
...,...,...,...,...,...,...,...,...,...,...
1255,4.951486,HOUSEHOLD_1_261_WI_2,TR-mse-ETS,1,12,4.171289,3,2016-04,2015-04,1
1256,37.643246,HOUSEHOLD_1_379_TX_2,TR-mse-ETS,1,12,77.006958,3,2016-04,2015-04,88
1257,13.995357,HOUSEHOLD_1_460_CA_1,TR-mse-ETS,1,12,11.033668,3,2016-04,2015-04,34
1258,1.460765,HOUSEHOLD_2_290_CA_4,TR-mse-ETS,1,12,1.504382,3,2016-04,2015-04,3


##### Use the Temporal Reconciler

In [20]:
from DeepRetail.reconciliation.temporal import TemporalReconciler

In [21]:
# Define the TemporalReconciler
temporal_reconciler = TemporalReconciler(bottom_level_freq = bottom_level_freq)

In [22]:
# Example 1:
# Reconcile wihtout holdout set
# Reconcile with structural scaling
holdout = False

In [23]:
# Fit the reconcilcer
temporal_reconciler.fit(base_fc_1)

In [24]:
reconciled = temporal_reconciler.reconcile('struc')
reconciled

Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base
0,434.355233,FOODS_2_173_CA_1,TR-struc-ETS,12,1,426.167627
1,236.655250,FOODS_2_270_CA_1,TR-struc-ETS,12,1,281.131529
2,309.659080,FOODS_3_183_WI_3,TR-struc-ETS,12,1,89.396315
3,6335.337239,FOODS_3_694_TX_2,TR-struc-ETS,12,1,6240.217023
4,458.710725,FOODS_3_712_WI_2,TR-struc-ETS,12,1,454.199753
...,...,...,...,...,...,...
415,5.771747,HOUSEHOLD_1_261_WI_2,TR-struc-ETS,1,12,2.652389
416,88.067902,HOUSEHOLD_1_379_TX_2,TR-struc-ETS,1,12,87.176401
417,17.626134,HOUSEHOLD_1_460_CA_1,TR-struc-ETS,1,12,6.897244
418,1.960321,HOUSEHOLD_2_290_CA_4,TR-struc-ETS,1,12,2.124701


In [25]:
# Example 2:
# Reconcile with a holdout set
# Reconcile using mse
holdout = True
cv = 3

In [26]:
base_fc_2

Unnamed: 0,temporal_level,unique_id,date,y,cutoff,Model,fh,cv,y_true
0,1,FOODS_2_173_CA_1,2015-03,29.990204,2015-02,ETS,1,1,18
1,1,FOODS_2_173_CA_1,2015-04,29.990204,2015-02,ETS,2,1,21
2,1,FOODS_2_173_CA_1,2015-05,29.990204,2015-02,ETS,3,1,34
3,1,FOODS_2_173_CA_1,2015-06,29.990204,2015-02,ETS,4,1,43
4,1,FOODS_2_173_CA_1,2015-07,29.990204,2015-02,ETS,5,1,34
...,...,...,...,...,...,...,...,...,...
1255,12,HOUSEHOLD_1_261_WI_2,2015,103.501536,2014,ETS,1,3,72
1256,12,HOUSEHOLD_1_379_TX_2,2015,305.999130,2014,ETS,1,3,1286
1257,12,HOUSEHOLD_1_460_CA_1,2015,199.870569,2014,ETS,1,3,260
1258,12,HOUSEHOLD_2_290_CA_4,2015,22.749990,2014,ETS,1,3,26


In [27]:
# Define the TemporalReconciler
temporal_reconciler = TemporalReconciler(bottom_level_freq = bottom_level_freq)

# Fit the reconcilcer
temporal_reconciler.fit(base_fc_2, holdout = holdout, cv = cv)

# Reconcile
reconciled = temporal_reconciler.reconcile('mse', residual_df = res_2)
reconciled


Unnamed: 0,y,unique_id,Model,temporal_level,fh,y_base,cv,date,cutoff,y_true
0,297.430718,FOODS_2_173_CA_1,TR-mse-ETS,12,1,255.000346,1,2015,2014,455
1,310.612782,FOODS_2_270_CA_1,TR-mse-ETS,12,1,361.512757,1,2015,2014,236
2,8.186486,FOODS_3_183_WI_3,TR-mse-ETS,12,1,24.997500,1,2015,2014,273
3,5821.129949,FOODS_3_694_TX_2,TR-mse-ETS,12,1,6135.862177,1,2015,2014,6609
4,357.884750,FOODS_3_712_WI_2,TR-mse-ETS,12,1,226.363500,1,2015,2014,364
...,...,...,...,...,...,...,...,...,...,...
1255,4.951486,HOUSEHOLD_1_261_WI_2,TR-mse-ETS,1,12,4.171289,3,2016-04,2015-04,1
1256,37.643246,HOUSEHOLD_1_379_TX_2,TR-mse-ETS,1,12,77.006958,3,2016-04,2015-04,88
1257,13.995357,HOUSEHOLD_1_460_CA_1,TR-mse-ETS,1,12,11.033668,3,2016-04,2015-04,34
1258,1.460765,HOUSEHOLD_2_290_CA_4,TR-mse-ETS,1,12,1.504382,3,2016-04,2015-04,3
