In [1]:
import sys
MAINPATH = ".."  # nopep8
sys.path.append(MAINPATH)  # nopep8
from src.models.model import NBeatsNet
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from torch import optim
from torch.nn import functional as F
from tqdm import tqdm

In [2]:
# plot utils.
def plot_scatter(*args, **kwargs):
    plt.plot(*args, **kwargs)
    plt.scatter(*args, **kwargs)


# simple batcher.
def data_generator(x, y, size):
    assert len(x) == len(y)
    batches = []
    for ii in range(0, len(x), size):
        batches.append((x[ii:ii + size], y[ii:ii + size]))
    for batch in batches:
        yield batch

In [3]:
df = pd.read_parquet("/Users/utkarshpratiush/Cr_D/modeling/data/master_dataset_merged_2023021518.parquet")
df = df[['date', 'target_w']]
series_std = df.groupby('date').apply(pd.DataFrame.std, numeric_only=True)

In [4]:
series_std

Unnamed: 0_level_0,target_w
date,Unnamed: 1_level_1
2016-02-17,0.040185
2016-02-24,0.047002
2016-03-02,0.036563
2016-03-09,0.031037
2016-03-16,0.031536
...,...
2022-12-07,0.030201
2022-12-14,0.032864
2022-12-21,0.021859
2022-12-28,0.025964


In [5]:
forecast_length = 1
backcast_length = 52 * forecast_length
batch_size = 128  # greater than 4 for viz

#print(series.head())
series = series_std.values.flatten()  # just keep np array here for simplicity.

# data backcast/forecast generation.
x, y = [], []
for epoch in range(backcast_length, len(series) - forecast_length):
    x.append(series[epoch - backcast_length:epoch])
    y.append(series[epoch:epoch + forecast_length])
x = np.array(x)
y = np.array(y)


# split train/test.
c = int(len(x) * 0.8)
x_train, y_train = x[:c], y[:c]
x_test, y_test = x[c:], y[c:]

# normalization.
norm_constant = np.max(x_train)
x_train, y_train = x_train / norm_constant, y_train / norm_constant
x_test, y_test = x_test / norm_constant, y_test / norm_constant



In [6]:
x.shape, y.shape

((307, 52), (307, 1))

In [7]:
# model
net = NBeatsNet(
    stack_types=(NBeatsNet.GENERIC_BLOCK, NBeatsNet.GENERIC_BLOCK),
    forecast_length=forecast_length,
    backcast_length=backcast_length,
    hidden_layer_units=128,
)
optimiser = optim.Adam(lr=1e-4, params=net.parameters())

| N-Beats
| --  Stack Generic (#0) (share_weights_in_stack=False)
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=52, forecast_length=1, share_thetas=False) at @4947302672
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=52, forecast_length=1, share_thetas=False) at @4865719264
     | -- GenericBlock(units=128, thetas_dim=4, backcast_length=52, forecast_length=1, share_thetas=False) at @4940352384
| --  Stack Generic (#1) (share_weights_in_stack=False)
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=52, forecast_length=1, share_thetas=False) at @4947302576
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=52, forecast_length=1, share_thetas=False) at @4947301376
     | -- GenericBlock(units=128, thetas_dim=8, backcast_length=52, forecast_length=1, share_thetas=False) at @4947302720


In [8]:

grad_step = 0
for epoch in tqdm(range(1000)):
    # train.
    net.train()
    train_loss = []
    for x_train_batch, y_train_batch in data_generator(x_train, y_train, batch_size):
        grad_step += 1
        optimiser.zero_grad()
        _, forecast = net(torch.tensor(x_train_batch, dtype=torch.float).to(net.device))
        loss = F.mse_loss(forecast, torch.tensor(y_train_batch, dtype=torch.float).to(net.device))
        train_loss.append(loss.item())
        loss.backward()
        optimiser.step()
    train_loss = np.mean(train_loss)

    # test.
    net.eval()
    _, forecast = net(torch.tensor(x_test, dtype=torch.float))
    test_loss = F.mse_loss(forecast, torch.tensor(y_test, dtype=torch.float)).item()
    p = forecast.detach().numpy()
    if epoch % 100 == 0:
        #with torch.no_grad():
        #    save(net, optimiser, grad_step)
        print(f'epoch = {str(epoch).zfill(4)}, '
                f'grad_step = {str(grad_step).zfill(6)}, '
                f'tr_loss (epoch) = {1000 * train_loss:.3f}, '
                f'te_loss (epoch) = {1000 * test_loss:.3f}')




  2%|▏         | 16/1000 [00:00<00:12, 81.55it/s]

epoch = 0000, grad_step = 000002, tr_loss (epoch) = 209.695, te_loss (epoch) = 178.935


 11%|█         | 111/1000 [00:01<00:08, 101.08it/s]

epoch = 0100, grad_step = 000202, tr_loss (epoch) = 7.299, te_loss (epoch) = 3.457


 22%|██▏       | 219/1000 [00:02<00:07, 104.41it/s]

epoch = 0200, grad_step = 000402, tr_loss (epoch) = 3.563, te_loss (epoch) = 2.143


 32%|███▏      | 318/1000 [00:03<00:06, 105.11it/s]

epoch = 0300, grad_step = 000602, tr_loss (epoch) = 1.380, te_loss (epoch) = 2.227


 42%|████▏     | 417/1000 [00:04<00:05, 105.87it/s]

epoch = 0400, grad_step = 000802, tr_loss (epoch) = 0.388, te_loss (epoch) = 2.448


 52%|█████▏    | 516/1000 [00:05<00:04, 105.78it/s]

epoch = 0500, grad_step = 001002, tr_loss (epoch) = 0.247, te_loss (epoch) = 2.551


 62%|██████▏   | 615/1000 [00:06<00:03, 100.96it/s]

epoch = 0600, grad_step = 001202, tr_loss (epoch) = 0.140, te_loss (epoch) = 2.601


 71%|███████▏  | 714/1000 [00:06<00:02, 101.65it/s]

epoch = 0700, grad_step = 001402, tr_loss (epoch) = 0.095, te_loss (epoch) = 2.750


 81%|████████▏ | 813/1000 [00:07<00:01, 99.87it/s] 

epoch = 0800, grad_step = 001602, tr_loss (epoch) = 0.062, te_loss (epoch) = 2.778


 91%|█████████ | 912/1000 [00:08<00:00, 105.56it/s]

epoch = 0900, grad_step = 001802, tr_loss (epoch) = 0.038, te_loss (epoch) = 2.846


100%|██████████| 1000/1000 [00:09<00:00, 102.14it/s]
