In [1]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
# !pip install metaforecast

# Data preparation

Loading a dataset and splitting it into training and testing

In [2]:
import pandas as pd

# loading dataset
df = pd.read_parquet('https://datasets-nixtla.s3.amazonaws.com/m4-hourly.parquet')

# setting the forecasting horizon and the input size
horizon = 12
n_lags = 24

df.head()

Unnamed: 0,unique_id,ds,y
0,H1,1,605.0
1,H1,2,586.0
2,H1,3,586.0
3,H1,4,559.0
4,H1,5,511.0


In [3]:
df_by_unq = df.groupby('unique_id')

train_l, test_l = [], []
for g, df_ in df_by_unq:
    df_ = df_.sort_values('ds')

    train_df_g = df_.head(-horizon)
    test_df_g = df_.tail(horizon)

    train_l.append(train_df_g)
    test_l.append(test_df_g)

train_df = pd.concat(train_l).reset_index(drop=True)
test_df = pd.concat(test_l).reset_index(drop=True)

train_df.tail()

Unnamed: 0,unique_id,ds,y
368399,H99,732,16400.0
368400,H99,733,18181.0
368401,H99,734,19684.0
368402,H99,735,21290.0
368403,H99,736,22578.0


In [4]:
test_df.head()

Unnamed: 0,unique_id,ds,y
0,H1,737,696.0
1,H1,738,761.0
2,H1,739,812.0
3,H1,740,836.0
4,H1,741,838.0


# Model setup


focus on neuralforecast

In [5]:
from neuralforecast import NeuralForecast
from neuralforecast.models import (NHITS,
                                   LSTM,
                                   GRU,
                                   NBEATS,
                                   MLP,
                                   TiDE)

CONFIG = {
    'input_size': n_lags,
    'h': horizon,
    'accelerator': 'cpu', 'max_steps':10,
}

models = [
    NBEATS(**CONFIG, stack_types=3*["identity"]),
    NHITS(**CONFIG),
    MLP( **CONFIG),
    MLP(num_layers=3, **CONFIG),
    #LSTM(**CONFIG),
    #GRU(**CONFIG),
    #TiDE( **CONFIG),
]


nf = NeuralForecast(models=models, freq=24)

Seed set to 1
Seed set to 1
Seed set to 1
Seed set to 1


In [6]:
n_windows = train_df['unique_id'].value_counts().min() - n_lags - 1

fcst_cv = nf.cross_validation(df=train_df, n_windows=n_windows, step_size=1)
fcst_cv = fcst_cv.reset_index()
fcst_cv = fcst_cv.groupby(['unique_id', 'cutoff']).head(1).drop(columns='cutoff')

fcst_cv.head()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
9.831     Total estimated model params size (MB)


Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  9.78it/s, v_num=28, train_loss_step=24.50]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 148.77it/s][A
Validation DataLoader 0:  15%|████████████████▉                                                                            

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:01<00:00,  8.59it/s, v_num=28, train_loss_step=24.50, train_loss_epoch=49.60]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:08<00:00,  1.50it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.751     Total estimated model params size (MB)



Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  7.87it/s, v_num=30, train_loss_step=23.40]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 136.57it/s][A
Validation DataLoader 0:  15%|████████████████▉                                                                           

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:01<00:00,  7.15it/s, v_num=30, train_loss_step=23.40, train_loss_epoch=48.70]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:09<00:00,  1.35it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 1.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.350     Total estimated model params size (MB)



Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:00<00:00, 20.85it/s, v_num=32, train_loss_step=33.60]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 169.15it/s][A
Validation DataLoader 0:  15%|████████████████▉                                                                           

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:00<00:00, 17.12it/s, v_num=32, train_loss_step=33.60, train_loss_epoch=80.00]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:03<00:00,  3.39it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 2.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.548     Total estimated model params size (MB)



Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  8.25it/s, v_num=34, train_loss_step=33.30]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 159.97it/s][A
Validation DataLoader 0:  15%|████████████████▉                                                                           

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:01<00:00,  7.57it/s, v_num=34, train_loss_step=33.30, train_loss_epoch=81.90]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:08<00:00,  1.52it/s]


Unnamed: 0,unique_id,ds,NBEATS,NHITS,MLP,MLP1,y
0,H1,15,491.808289,492.728455,508.756592,557.799622,493.0
12,H1,16,518.546997,523.763611,543.551514,574.712036,554.0
24,H1,17,576.509033,576.247559,528.935242,554.555176,610.0
36,H1,18,628.377075,614.479492,574.343994,554.490234,666.0
48,H1,19,680.637573,652.831848,608.28479,564.328979,715.0


# ensembling


In [7]:
from metaforecast.ensembles.mlpol import MLpol

In [10]:
ensemble = MLpol(loss_type='square', gradient=True, trim_ratio=0.8, weight_by_uid=False)

In [11]:
ensemble.fit(fcst_cv)

In [12]:
nf.fit(df=train_df)


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
9.831     Total estimated model params size (MB)


Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  6.65it/s, v_num=36, train_loss_step=128.0]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 114.50it/s][A
Validation DataLoader 0:  15%|█████████████████                                                                            

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:01<00:00,  5.51it/s, v_num=36, train_loss_step=128.0, train_loss_epoch=746.0]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.4 M  | train
-------------------------------------------------------
2.4 M     Trainable params
0         Non-trainable params
2.4 M     Total params
9.751     Total estimated model params size (MB)



Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  5.43it/s, v_num=37, train_loss_step=187.0]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 104.61it/s][A
Validation DataLoader 0:  15%|█████████████████                                                                           

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|███████████████████████████████████████████████████▌               | 10/13 [00:02<00:00,  4.86it/s, v_num=37, train_loss_step=187.0, train_loss_epoch=1.05e+3]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 1.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
1.1 M     Trainable params
0         Non-trainable params
1.1 M     Total params
4.350     Total estimated model params size (MB)


Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  7.76it/s, v_num=38, train_loss_step=96.40]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▍                                                                                                     | 1/13 [00:00<00:00, 132.14it/s][A
Validation DataLoader 0:  15%|█████████████████                                                                            

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|█████████████████████████████████████████████████████                | 10/13 [00:01<00:00,  6.86it/s, v_num=38, train_loss_step=96.40, train_loss_epoch=732.0]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | mlp          | ModuleList    | 2.1 M  | train
4 | out          | Linear        | 12.3 K | train
-------------------------------------------------------
2.1 M     Trainable params
0         Non-trainable params
2.1 M     Total params
8.548     Total estimated model params size (MB)



Epoch 0:  77%|███████████████████████████████████████████████████████████████████████▌                     | 10/13 [00:01<00:00,  5.61it/s, v_num=39, train_loss_step=161.0]
Validation: |                                                                                                                                         | 0/? [00:00<?, ?it/s][A
Validation:   0%|                                                                                                                                    | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   0%|                                                                                                                       | 0/13 [00:00<?, ?it/s][A
Validation DataLoader 0:   8%|████████▌                                                                                                      | 1/13 [00:00<00:00, 98.30it/s][A
Validation DataLoader 0:  15%|█████████████████                                                                           

`Trainer.fit` stopped: `max_steps=10` reached.


Epoch 0:  77%|███████████████████████████████████████████████████▌               | 10/13 [00:02<00:00,  4.74it/s, v_num=39, train_loss_step=161.0, train_loss_epoch=1.57e+3]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 28.75it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 57.50it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 96.01it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 55.55it/s]


In [19]:
fcst = nf.predict(step_size=1)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 100.37it/s]

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs



Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 152.69it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 200.33it/s]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 13/13 [00:00<00:00, 170.76it/s]


In [21]:
fcst.query('unique_id=="H1"')

Unnamed: 0_level_0,ds,NBEATS,NHITS,MLP,MLP1
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
H1,760,656.379639,637.912354,803.190735,512.01123
H1,784,721.976929,653.890076,820.819336,517.277283
H1,808,729.610474,690.290955,784.360291,546.837646
H1,832,783.844788,696.610229,946.140686,524.023438
H1,856,800.086487,686.995361,836.007324,503.833008
H1,880,781.88031,697.947449,743.06897,518.366272
H1,904,683.392822,699.303101,711.253967,490.032227
H1,928,715.381653,674.295349,607.173096,509.725342
H1,952,688.444824,651.708557,763.695007,517.78479
H1,976,670.183411,651.479431,789.079224,472.931213


In [14]:
fcst_ensemble = ensemble.predict(fcst.reset_index())
fcst_ensemble

0         641.353299
1         666.576463
2         697.617224
3         712.864342
4         708.067239
            ...     
4963    23466.613806
4964    22686.546856
4965    22534.340592
4966    22431.354131
4967    22057.653137
Name: Mixture, Length: 4968, dtype: float64

In [15]:
fcst[ensemble.alias] = fcst_ensemble.values

fcst.head()

Unnamed: 0_level_0,ds,NBEATS,NHITS,MLP,MLP1,Mixture
unique_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
H1,760,656.379639,637.912354,803.190735,512.01123,641.353299
H1,784,721.976929,653.890076,820.819336,517.277283,666.576463
H1,808,729.610474,690.290955,784.360291,546.837646,697.617224
H1,832,783.844788,696.610229,946.140686,524.023438,712.864342
H1,856,800.086487,686.995361,836.007324,503.833008,708.067239


In [18]:
test_df.merge(fcst, on=['unique_id','ds'], how="left")

Unnamed: 0,unique_id,ds,y,NBEATS,NHITS,MLP,MLP1,Mixture
0,H1,737,696.0,,,,,
1,H1,738,761.0,,,,,
2,H1,739,812.0,,,,,
3,H1,740,836.0,,,,,
4,H1,741,838.0,,,,,
...,...,...,...,...,...,...,...,...
4963,H99,744,24039.0,,,,,
4964,H99,745,22946.0,,,,,
4965,H99,746,22217.0,,,,,
4966,H99,747,21416.0,,,,,


In [None]:
test_df