# Informer Demo

In [None]:
import sys

# if not 'Informer2020' in sys.path:
#     sys.path += ['Informer2020']

## Experiments: Train and Test

In [None]:
from utils.tools import dotdict
from exp.exp_informer import Exp_Informer
import torch
import numpy as np
import pandas as pd
import os
from utils.ipynb_helpers import args_from_setting, setting_from_args, handle_gpu

In [None]:
args = dotdict()
args.des = "full_1h"

args.model = "informer"  # model of experiment, options: [informer, informerstack, informerlight(TBD)]

args.data = "custom"  # data
args.root_path = "./data/stock/"  # root path of data file


args.data_path = "full_1h.csv"  # data file
args.features = "MS"  # forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate
args.target = "XOM_pctchange"  # target feature in S or MS task
args.freq = "h"  # freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h
args.checkpoints = "./checkpoints"  # location of model checkpoints

args.seq_len = 16  # input sequence length of Informer encoder
args.label_len = 4  # start token length of Informer decoder
args.pred_len = 1  # prediction sequence length
# Informer decoder input: concat[start token series(label_len), zero padding series(pred_len)]

args.cols = [
    "XOM_open",
    "XOM_high",
    "XOM_low",
    "XOM_close",
    "XOM_volume",
    "XOM_pctchange",
    "XOM_shortsma",
]  # ["XOM_close", "BP_close", "CVX_close", "WTI_close"]
args.enc_in = 7  # 13 # encoder input size
args.dec_in = 7  # 13 # decoder input size
args.c_out = 1  # output size
args.factor = 5  # probsparse attn factor
args.d_model = 64  # 512 # dimension of model
args.n_heads = 8  # num of heads
args.e_layers = 4  # 2 # num of encoder layers
args.d_layers = 2  # 1 # num of decoder layers
args.d_ff = 2048  # dimension of fcn in model
args.dropout = 0.05  # dropout
args.attn = "prob"  # attention used in encoder, options:[prob, full]
args.t_embed = "timeF"  # time features encoding, options:[timeF, fixed, learned]
args.activation = "gelu"  # activation
args.distil = True  # whether to use distilling in encoder
args.output_attention = False  # whether to output attention in encoder
args.mix = True
args.padding = 0

args.batch_size = 64
args.learning_rate = 0.00001
args.loss = "mse"
args.lradj = "type1"
args.use_amp = False  # whether to use automatic mixed precision training

args.num_workers = 0
args.itr = 1  # number of runs
args.max_epochs = 15
args.patience = 3


args.scale = True  # True # True
args.inverse = True  # True # Defaultly False but @Zac thinks it should be True


args.date_start = None  # "2021-01-01"
args.date_end = None
args.date_test = "2022-04-01"  # None

handle_gpu(args, None)

# idk what this is for
args.detail_freq = args.freq
args.freq = args.freq[-1:]

print("Args in experiment:")
print(args)
Exp = Exp_Informer

### Train & Test *args.itr* models

In [None]:
exp = None
setting = None
for ii in range(args.itr):
    # setting record of experiments
    setting = setting_from_args(args, ii)

    # set experiments
    exp = Exp(args)

    # train
    print(f">>>>>>>start training : {setting}>>>>>>>>>>>>>>>>>>>>>>>>>>")
    exp.train(setting)

    # test
    print(f">>>>>>>testing : {setting}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<")
    exp.test(setting)

    torch.cuda.empty_cache()

## Prediction

In [None]:
# If you already have a trained model, you can set the arguments and model path, then initialize a Experiment and use it to predict
# Prediction is a sequence which is adjacent to the last date of the data, and does not exist in the data
# If you want to get more information about prediction, you can refer to code `exp/exp_informer.py function predict()` and `data/data_loader.py class Dataset_Pred`

# args = dotdict(model='informer', data='WTH', root_path='./data/ETT/', data_path='WTH.csv', features='M', target='WetBulbCelsius', freq='h', checkpoints='./checkpoints/', seq_len=96, label_len=48, pred_len=24, enc_in=12, dec_in=12, c_out=12, d_model=512, n_heads=8, e_layers=2, d_layers=1, s_layers=[3, 2, 1], d_ff=2048, factor=5, padding=0, distil=True, dropout=0.05, attn='prob', t_embed='timeF', activation='gelu', output_attention=False, do_predict=False, mix=True, cols=None, num_workers=0, itr=2, max_epochs=6, batch_size=32, patience=3, learning_rate=0.0001, des='test', loss='mse', lradj='type1', use_amp=False, inverse=False, use_gpu=True, gpu=0, use_multi_gpu=False, devices='0,1,2,3', detail_freq='h')

manual = False

if manual:
    setting = "informer_custom_ftMS_sl256_ll64_pl16_ei1_di1_co1_iFalse_dm512_nh8_el2_dl1_df2048_atprob_fc5_ebtimeF_dtTrue_mxTrue_exp_0"
    args = args_from_setting(setting, args)

    exp = Exp(args)

path = os.path.join(args.checkpoints, setting, "checkpoint.pth")

exp.predict(setting, True)

In [None]:
# the prediction will be saved in ./results/{setting}/real_prediction.npy

prediction = np.load(f"./results/{setting}/real_prediction.npy")

prediction.shape

### More details about Prediction - prediction function

In [None]:
# here is the detailed code of function predict


def predict(exp, setting, load=False):
    pred_data, pred_loader = exp._get_data(flag="pred")

    if load:
        path = os.path.join(exp.args.checkpoints, setting)
        best_model_path = path + "/" + "checkpoint.pth"
        exp.model.load_state_dict(torch.load(best_model_path))

    exp.model.eval()

    preds = []

    for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(pred_loader):
        batch_x = batch_x.float().to(exp.device)
        batch_y = batch_y.float()
        batch_x_mark = batch_x_mark.float().to(exp.device)
        batch_y_mark = batch_y_mark.float().to(exp.device)

        # decoder input
        if exp.args.padding == 0:
            dec_inp = torch.zeros(
                [batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]
            ).float()
        elif exp.args.padding == 1:
            dec_inp = torch.ones(
                [batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]
            ).float()
        else:
            dec_inp = torch.zeros(
                [batch_y.shape[0], exp.args.pred_len, batch_y.shape[-1]]
            ).float()
        dec_inp = (
            torch.cat([batch_y[:, : exp.args.label_len, :], dec_inp], dim=1)
            .float()
            .to(exp.device)
        )
        # encoder - decoder
        if exp.args.use_amp:
            with torch.cuda.amp.autocast():
                if exp.args.output_attention:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                    outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
            if exp.args.output_attention:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
            else:
                outputs = exp.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        f_dim = -1 if exp.args.features == "MS" else 0
        batch_y = batch_y[:, -exp.args.pred_len :, f_dim:].to(exp.device)

        pred = outputs.detach().cpu().numpy()  # .squeeze()

        preds.append(pred)

    preds = np.array(preds)
    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])

    # result save
    folder_path = "./results/" + setting + "/"
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)

    np.save(folder_path + "real_prediction.npy", preds)

    return preds

In [None]:
# you can also use this prediction function to get result
prediction = predict(exp, setting, True)

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(prediction[0, :, -1])
plt.show()

### More details about Prediction - prediction dataset

You can give a `root_path` and `data_path` of the data you want to forecast, and set `seq_len`, `label_len`, `pred_len` and other arguments as other Dataset. The difference is that you can set a more detailed freq such as `15min` or `3h` to generate the timestamp of prediction series.

`Dataset_Pred` only has one sample (including `encoder_input: [1, seq_len, dim]`, `decoder_token: [1, label_len, dim]`, `encoder_input_timestamp: [1, seq_len, date_dim]`, `decoder_input_timstamp: [1, label_len+pred_len, date_dim]`). It will intercept the last sequence of the given data (seq_len data) to forecast the unseen future sequence (pred_len data).

In [1]:
from data_provider.data_loader import Dataset_Pred
from torch.utils.data import DataLoader

In [None]:
Data = Dataset_Pred
timeenc = 0 if args.t_embed != "timeF" else 1
flag = "pred"
shuffle_flag = False
drop_last = False
batch_size = 1

freq = args.detail_freq

data_set = Data(args, flag=flag, freq=freq, timeenc=timeenc)

data_loader = DataLoader(
    data_set,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=args.num_workers,
    drop_last=drop_last,
)

In [None]:
len(data_set), len(data_loader)

## Visualization

In [None]:
# When we finished exp.train(setting) and exp.test(setting), we will get a trained model and the results of test experiment
# The results of test experiment will be saved in ./results/{setting}/pred.npy (prediction of test dataset) and ./results/{setting}/true.npy (groundtruth of test dataset)

preds = np.load(f"./results/{setting}/pred.npy")
trues = np.load(f"./results/{setting}/true.npy")

# [samples, pred_len, dimensions]
preds.shape, trues.shape

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# draw OT prediction
plt.figure()
plt.plot(trues[0, :, -1], label="GroundTruth")
plt.plot(preds[0, :, -1], label="Prediction")
plt.legend()
plt.show()

In [None]:
print(trues.shape)
print(preds.shape)
MSE = np.square(np.subtract(trues, preds)).mean()
RMSE = np.sqrt(MSE)

print("against preds", MSE, RMSE)


MSE = np.square(np.subtract(preds, np.zeros(preds.shape))).mean()
RMSE = np.sqrt(MSE)
print("against 0s", MSE, RMSE)

In [None]:
# draw HUFL prediction
plt.figure()
plt.plot(trues[0, :, 0], label="GroundTruth")
plt.plot(preds[0, :, 0], label="Prediction")
plt.legend()
plt.show()

In [None]:
from data_provider.data_loader import Dataset_Custom
from torch.utils.data import DataLoader

Data = Dataset_Custom
timeenc = 0 if args.t_embed != "timeF" else 1
flag = "test"
shuffle_flag = False
drop_last = True
batch_size = 1
data_set = Data(args, flag=flag, freq=freq, timeenc=timeenc)

data_loader = DataLoader(
    data_set,
    batch_size=batch_size,
    shuffle=shuffle_flag,
    num_workers=args.num_workers,
    drop_last=drop_last,
)

In [None]:
import os

args.output_attention = True

exp = Exp(args)

model = exp.model

path = os.path.join(args.checkpoints, setting, "checkpoint.pth")

print(model.load_state_dict(torch.load(path)))

df = pd.read_csv(os.path.join(args.root_path, args.data_path))
df[args.cols].head()

### Attention Visualization

In [None]:
idx = 0
for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(data_loader):
    if i != idx:
        continue
    batch_x = batch_x.float().to(exp.device)
    batch_y = batch_y.float()

    batch_x_mark = batch_x_mark.float().to(exp.device)
    batch_y_mark = batch_y_mark.float().to(exp.device)

    dec_inp = torch.zeros_like(batch_y[:, -args.pred_len :, :]).float()
    dec_inp = (
        torch.cat([batch_y[:, : args.label_len, :], dec_inp], dim=1)
        .float()
        .to(exp.device)
    )

    outputs, attn = model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

In [None]:
attn[0].shape, attn[1].shape  # , attn[2].shape

In [None]:
layers = [0, 1]
distil = "Distil" if args.distil else "NoDistil"
for layer in layers:
    print("\n\n==========================")
    print("Showing attention layer", layer)
    print("==========================\n\n")
    for h in range(0, args.n_heads):
        plt.figure(figsize=[10, 8])
        plt.title(f"Informer, {distil}, attn:{args.attn} layer:{layer} head:{h}")
        A = attn[layer][0, h].detach().cpu().numpy()
        ax = sns.heatmap(A, vmin=0, vmax=A.max() + 0.01)
        plt.show()