# Benchmarks Results

In [1]:
import os
os.chdir("..")

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from neuralforecast import NeuralForecast
from neuralforecast.losses.numpy import smape
from datasetsforecast.m3 import M3    
from neuralforecast.losses.pytorch import SMAPE
import hydra
from omegaconf import DictConfig


## models
from neuralforecast.auto import AutoDeepAR, AutoNBEATS, AutoNHITS, AutoTFT, AutoTCN, AutoRNN, AutoLSTM
from neuralforecast.models import DeepAR, NBEATS, NHITS, TFT, TCN, RNN, LSTM

  from .autonotebook import tqdm as notebook_tqdm
2025-03-03 16:23:47,349	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-03-03 16:23:47,635	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


In [3]:
import run_exp as utils


def run_all_models():
    hydra.initialize(config_path="./../conf")
    cfg = hydra.compose(config_name="config.yaml")
    
    results, forecasts = utils.run_exp(cfg)
    results.to_csv("results.csv", index=False)

    return results, forecasts
results, forecasts = run_all_models()


==== Running experiment for dataset: m4_monthly ====
Loading m4_monthly dataset...


: 

In [6]:
print(forecasts)

      unique_id         ds       NBEATS
0            M1 1994-03-31  2400.114746
1            M1 1994-04-30  2398.695068
2            M1 1994-05-31  1880.209717
3            M1 1994-06-30  1930.788940
4            M1 1994-07-31  2197.697754
...         ...        ...          ...
25699      M999 1993-10-31  5536.040039
25700      M999 1993-11-30  5562.914551
25701      M999 1993-12-31  5567.224121
25702      M999 1994-01-31  5593.346680
25703      M999 1994-02-28  5616.052246

[25704 rows x 3 columns]


In [7]:
dic: DictConfig = DictConfig({
    'directory': './data/m3',
    'group': 'Monthly',
})


Y_all = utils.load_dataset('m3_monthly', dic)
y_truth = Y_all.groupby('unique_id').tail(18)
y_truth

Loading m3_monthly dataset...


Unnamed: 0,unique_id,ds,y
50,M1,1994-03-31,2280.0
51,M1,1994-04-30,480.0
52,M1,1994-05-31,5040.0
53,M1,1994-06-30,1920.0
54,M1,1994-07-31,840.0
...,...,...,...
167557,M999,1993-10-31,5225.9
167558,M999,1993-11-30,5236.3
167559,M999,1993-12-31,5186.6
167560,M999,1994-01-31,5143.4


In [8]:
joined = pd.merge(forecasts, y_truth, on='unique_id')
joined

Unnamed: 0,unique_id,ds_x,NBEATS,ds_y,y
0,M1,1994-03-31,2400.114746,1994-03-31,2280.0
1,M1,1994-03-31,2400.114746,1994-04-30,480.0
2,M1,1994-03-31,2400.114746,1994-05-31,5040.0
3,M1,1994-03-31,2400.114746,1994-06-30,1920.0
4,M1,1994-03-31,2400.114746,1994-07-31,840.0
...,...,...,...,...,...
462667,M999,1994-02-28,5616.052246,1993-10-31,5225.9
462668,M999,1994-02-28,5616.052246,1993-11-30,5236.3
462669,M999,1994-02-28,5616.052246,1993-12-31,5186.6
462670,M999,1994-02-28,5616.052246,1994-01-31,5143.4


In [9]:
## check for inf or nans and print rows
print(joined[joined['NBEATS'].isnull()])
print(joined[joined['NBEATS'].isna()])
print(joined[joined['NBEATS'].isin([np.nan, np.inf, -np.inf])])

Empty DataFrame
Columns: [unique_id, ds_x, NBEATS, ds_y, y]
Index: []
Empty DataFrame
Columns: [unique_id, ds_x, NBEATS, ds_y, y]
Index: []
Empty DataFrame
Columns: [unique_id, ds_x, NBEATS, ds_y, y]
Index: []


In [10]:
group_smape = joined.groupby('unique_id').apply(
    lambda group: smape(group['y'], group['NBEATS'])
)

# Map the computed SMAPE back to each row using the unique_id
joined['smape'] = joined['unique_id'].map(group_smape)
joined

Unnamed: 0,unique_id,ds_x,NBEATS,ds_y,y,smape
0,M1,1994-03-31,2400.114746,1994-03-31,2280.0,0.577843
1,M1,1994-03-31,2400.114746,1994-04-30,480.0,0.577843
2,M1,1994-03-31,2400.114746,1994-05-31,5040.0,0.577843
3,M1,1994-03-31,2400.114746,1994-06-30,1920.0,0.577843
4,M1,1994-03-31,2400.114746,1994-07-31,840.0,0.577843
...,...,...,...,...,...,...
462667,M999,1994-02-28,5616.052246,1993-10-31,5225.9,0.039516
462668,M999,1994-02-28,5616.052246,1993-11-30,5236.3,0.039516
462669,M999,1994-02-28,5616.052246,1993-12-31,5186.6,0.039516
462670,M999,1994-02-28,5616.052246,1994-01-31,5143.4,0.039516


In [11]:
# plot from the worst to the best
joined = joined.sort_values('smape', ascending=False)
joined


Unnamed: 0,unique_id,ds_x,NBEATS,ds_y,y,smape
313308,M584,1987-07-31,683.260315,1987-07-31,1025.0,1.311769
313429,M584,1988-01-31,606.627136,1988-08-31,17275.0,1.311769
313411,M584,1987-12-31,302.997192,1988-08-31,17275.0,1.311769
313412,M584,1987-12-31,302.997192,1988-09-30,23375.0,1.311769
313413,M584,1987-12-31,302.997192,1988-10-31,8575.0,1.311769
...,...,...,...,...,...,...
397439,M817,1993-08-31,7023.106934,1994-02-28,6974.0,0.009417
397440,M817,1993-09-30,7042.001465,1992-09-30,6896.0,0.009417
397441,M817,1993-09-30,7042.001465,1992-10-31,6922.0,0.009417
397442,M817,1993-09-30,7042.001465,1992-11-30,6948.0,0.009417


In [None]:
from ray import tune

dic: DictConfig = DictConfig({
    'directory': './data/m3',
    'group': 'Monthly',
})

Y_all = utils.load_dataset('m3_monthly', dic)

# horizons: list = [i for i in range(1, 2)]
horizons: list = [12, 18, 24]
results = pd.DataFrame(columns=['model', 'horizon', 'smape'])

models = [
    # AutoDeepAR,
    # AutoNBEATS,
    AutoNHITS,
    # AutoTFT,
    # AutoTCN,
    # AutoRNN,
    # AutoLSTM
    # DeepAR,
    # NBEATS,
    # NHITS,
    # TFT,
    # TCN,
    # RNN,
    # LSTM
]


for modelCls in models:
    for horizon in horizons:

        
        print(f"Running {modelCls.__name__} with horizon {horizon}")


        # model = modelCls(
        #     h=horizon,
        #     input_size=24,
        #     loss=SMAPE(),
        #     early_stopping_patience=10,
        #     batch_size=32,
        #     valid_loss=SMAPE(),
        # )
        model = modelCls(
            num_samples = 5,
            loss=SMAPE(),
            h=horizon,
        )

        nf = NeuralForecast(
            models=[model],
            freq='M',
        )
        
        Y_hat = nf.cross_validation(df=Y_all, val_size=horizon)
        results = pd.concat([results, pd.DataFrame({
            'model': [model.__class__.__name__],
            'horizon': [horizon],
            'smape': [utils.calculate_smape(Y_hat, Y_hat, model.__class__.__name__)]
        })], ignore_index=True)

2025-02-12 00:20:29,769	ERROR tune_controller.py:1331 -- Trial task failed for trial _train_tune_23c8a_00000
Traceback (most recent call last):
  File "c:\Users\ricar\mixture_of_experts_time_series\venv\Lib\site-packages\ray\air\execution\_internal\event_manager.py", line 110, in resolve_future
    result = ray.get(future)
             ^^^^^^^^^^^^^^^
  File "c:\Users\ricar\mixture_of_experts_time_series\venv\Lib\site-packages\ray\_private\auto_init_hook.py", line 21, in auto_init_wrapper
    return fn(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ricar\mixture_of_experts_time_series\venv\Lib\site-packages\ray\_private\client_mode_hook.py", line 103, in wrapper
    return func(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\ricar\mixture_of_experts_time_series\venv\Lib\site-packages\ray\_private\worker.py", line 2772, in get
    values, debugger_breakpoint = worker.get_objects(object_refs, timeout=timeout)
                                  ^^^^^^^^

RuntimeError: No best trial found for the given metric: loss. This means that no trial has reported this metric, or all values reported for this metric are NaN. To not ignore NaN values, you can set the `filter_nan_and_inf` arg to False.

In [None]:
results

model,nbeats,nhits,simpleMoe
horizon,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
12,0.124858,0.126106,0.138079
18,0.13921,0.142841,0.159051
24,0.147199,0.148069,0.161115


In [None]:
# show results in a graph

results = results.pivot(index='horizon', columns='model', values='smape')
results.plot()
plt.title('M3 Monthly')
plt.ylabel('SMAPE')
plt.xlabel('Horizon')
plt.show()


KeyError: 'horizon'

In [None]:
# for each series in the dataset give the number of observations and the number of missing values
Y_all = utils.load_dataset('m3_monthly', dic)

missing = Y_all.groupby('unique_id')['y'].apply(lambda x: x.isna().sum()).reset_index(name='missing')
total = Y_all.groupby('unique_id')['y'].count().reset_index(name='total')
missing = pd.merge(missing, total, on='unique_id')
missing['missing_ratio'] = missing['missing'] / missing['total']
missing


Loading m3_monthly dataset...


  freq = pd.tseries.frequencies.to_offset(class_group.freq)


Unnamed: 0,unique_id,missing,total,missing_ratio
0,M1,0,68,0.0
1,M10,0,68,0.0
2,M100,0,69,0.0
3,M1000,0,134,0.0
4,M1001,0,134,0.0
...,...,...,...,...
1423,M995,0,134,0.0
1424,M996,0,134,0.0
1425,M997,0,134,0.0
1426,M998,0,134,0.0


In [None]:
# sort by total number of total values ascending
missing.sort_values(ascending=True, by='total').head(10)

Unnamed: 0,unique_id,missing,total,missing_ratio
89,M1079,0,66,0.0
88,M1078,0,66,0.0
0,M1,0,68,0.0
1,M10,0,68,0.0
334,M13,0,68,0.0
1317,M9,0,68,0.0
445,M14,0,68,0.0
984,M6,0,68,0.0
496,M16,0,68,0.0
285,M1255,0,68,0.0
