In [1]:
!pip install torchmetrics

Collecting torchmetrics
  Downloading torchmetrics-1.2.1-py3-none-any.whl (806 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m806.1/806.1 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting lightning-utilities>=0.8.0 (from torchmetrics)
  Downloading lightning_utilities-0.10.0-py3-none-any.whl (24 kB)
Installing collected packages: lightning-utilities, torchmetrics
Successfully installed lightning-utilities-0.10.0 torchmetrics-1.2.1


In [2]:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/인공지능프로젝트2/PatchTST')

Mounted at /content/drive


In [3]:
%cd /content/drive/MyDrive/Colab Notebooks/인공지능프로젝트2/PatchTST

/content/drive/MyDrive/Colab Notebooks/인공지능프로젝트2/PatchTST


In [4]:
import argparse

import os
import time

import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from torch import optim
from torch.optim import lr_scheduler
from torch import Tensor

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from typing import Callable, List, Dict, Union, Optional
from datetime import datetime
from sklearn.preprocessing import StandardScaler

from torchmetrics.functional.regression import spearman_corrcoef
from torchmetrics.regression import PearsonCorrCoef

from timefeatures import time_features
from exp_basic import Exp_Basic
from patchtst_tools import EarlyStopping, adjust_learning_rate, visual, test_params_flop
from metrics import metric
from PatchTST_backbone import PatchTST_backbone
from PatchTST_layers import series_decomp

import warnings

warnings.filterwarnings('ignore')

In [5]:
def read_wsdm23_file(file_path: str,) -> Dict[str, pd.DataFrame]:
  wsdm23_dict = np.load(file_path, allow_pickle=True).item()
  for df in wsdm23_dict.values():
    df.index.name = 'date'
  return wsdm23_dict

def load_dataset():
  feature_names = None
  label_names = None

  file_path = '/content/drive/MyDrive/Colab Notebooks/인공지능프로젝트2/baseline_data_sp500.npy'
  df = read_wsdm23_file(file_path)
  return df

df = load_dataset()

In [6]:
dataset = df.copy()
df_list = []
for key in dataset.keys():
  df_list.append(pd.DataFrame({'date':dataset[key].index, 'close':dataset[key]['close'].values, 'open':dataset[key]['open'].values, 'high':dataset[key]['high'].values,\
                               'low':dataset[key]['low'].values, 'volume':dataset[key]['volume'].values, 'adjclose':dataset[key]['adjclose'].values}))
dataset = pd.concat(df_list, ignore_index=True)
dataset['date'] = pd.to_datetime(dataset['date'])
dataset

Unnamed: 0,date,close,open,high,low,volume,adjclose
0,2012-05-14,36.299999,36.360001,36.840000,35.979999,230900.0,32.237789
1,2012-05-15,36.750000,36.139999,37.119999,36.139999,328500.0,32.637432
2,2012-05-16,36.619999,36.950001,37.080002,36.549999,220300.0,32.521984
3,2012-05-17,34.979999,36.540001,36.540001,34.830002,418400.0,31.065506
4,2012-05-18,35.139999,34.900002,35.660000,34.779999,200300.0,31.207592
...,...,...,...,...,...,...,...
1197319,2022-05-19,90.250000,91.379997,92.459999,89.540001,5234700.0,90.250000
1197320,2022-05-20,90.080002,90.839996,91.169998,88.430000,6585500.0,90.080002
1197321,2022-05-23,91.830002,90.599998,92.019997,90.000000,4701300.0,91.830002
1197322,2022-05-24,93.209999,91.199997,93.419998,90.470001,5932000.0,93.209999


In [7]:
train_start = datetime.strptime('2012-05-14', '%Y-%m-%d')
valid_start = datetime.strptime('2020-05-22', '%Y-%m-%d')
test_start = datetime.strptime('2021-05-22', '%Y-%m-%d')
test_end = datetime.strptime('2022-05-25', '%Y-%m-%d')

train_dataset = dataset[(dataset['date'] >= train_start) & (dataset['date'] < valid_start)]
valid_dataset = dataset[(dataset['date'] >= valid_start) & (dataset['date'] < test_start)]
test_dataset = dataset[(dataset['date'] >= test_start) & (dataset['date'] <= test_end)]

In [8]:
class SP500(Dataset):
  def __init__(self, dataset=dataset, flag='train', size=None,
                features='S', target='OT', scale=True, timeenc=0, freq='d'):
    # size [seq_len, label_len, pred_len]
    # info
    if size == None:
      self.seq_len = 96
      self.label_len = 5
      self.pred_len = 5
    else:
      self.seq_len = size[0]
      self.label_len = size[1]
      self.pred_len = size[2]
    # init
    assert flag in ['train', 'test', 'val']
    type_map = {'train': 0, 'val': 1, 'test': 2}
    self.set_type = type_map[flag]

    self.features = features
    self.target = target
    self.scale = scale
    self.timeenc = timeenc
    self.freq = freq
    self.dataset = dataset
    self.__read_data__()

  def __read_data__(self):
    self.scaler = StandardScaler()
    df_raw = dataset

    border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
    border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
    border1 = border1s[self.set_type]
    border2 = border2s[self.set_type]

    if self.features == 'M' or self.features == 'MS':
      cols_data = df_raw.columns[1:]
      df_data = df_raw[cols_data]
    elif self.features == 'S':
      df_data = df_raw[[self.target]]

    if self.scale:
      train_data = df_data[border1s[0]:border2s[0]]
      self.scaler.fit(train_data.values)
      data = self.scaler.transform(df_data.values)
    else:
      data = df_data.values

    df_stamp = df_raw[['date']][border1:border2]
    df_stamp['date'] = pd.to_datetime(df_stamp.date)
    if self.timeenc == 0:
      df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
      df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
      df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
      df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
      data_stamp = df_stamp.drop(['date'], axis=1).values
    elif self.timeenc == 1:
      data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
      data_stamp = data_stamp.transpose(1, 0)

    self.data_x = data[border1:border2]
    self.data_y = data[border1:border2]
    self.data_stamp = data_stamp

  def __getitem__(self, index):
    s_begin = index
    s_end = s_begin + self.seq_len
    r_begin = s_end - self.label_len
    r_end = r_begin + self.label_len + self.pred_len

    seq_x = self.data_x[s_begin:s_end]
    seq_y = self.data_y[r_begin:r_end]
    seq_x_mark = self.data_stamp[s_begin:s_end]
    seq_y_mark = self.data_stamp[r_begin:r_end]

    return seq_x, seq_y, seq_x_mark, seq_y_mark

  def __len__(self):
    return len(self.data_x) - self.seq_len - self.pred_len + 1

  def inverse_transform(self, data):
    return self.scaler.inverse_transform(data)

In [9]:
def data_provider(args, flag):
  Data = SP500
  timeenc = 0 if args.embed != 'timeF' else 1

  if flag == 'test':
    shuffle_flag = False
    drop_last = False
    batch_size = 1
    freq = args.freq
    dataset = test_dataset
  elif flag == 'val':
    shuffle_flag = False
    drop_last = True
    batch_size = args.batch_size
    freq = args.freq
    dataset = valid_dataset
  else:
    shuffle_flag = True
    drop_last = True
    batch_size = args.batch_size
    freq = args.freq
    dataset = train_dataset

  data_set = Data(
      dataset = dataset,
      flag=flag,
      size=[args.seq_len, args.label_len, args.pred_len],
      features=args.features,
      target=args.target,
      timeenc=timeenc,
      freq=freq)

  print(flag, len(data_set))
  data_loader = DataLoader(
      data_set,
      batch_size=batch_size,
      shuffle=shuffle_flag,
      num_workers=args.num_workers,
      drop_last=drop_last)
  return data_set, data_loader

In [10]:
__all__ = ['PatchTST']

class Model(nn.Module):
  def __init__(self, configs, max_seq_len:Optional[int]=1024, d_k:Optional[int]=None, d_v:Optional[int]=None, norm:str='BatchNorm', attn_dropout:float=0.,
              act:str="gelu", key_padding_mask:bool='auto',padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True,
              pre_norm:bool=False, store_attn:bool=False, pe:str='zeros', learn_pe:bool=True, pretrain_head:bool=False, head_type = 'flatten', verbose:bool=False, **kwargs):

    super().__init__()

    # load parameters
    c_in = configs.enc_in
    context_window = configs.seq_len
    target_window = configs.pred_len

    n_layers = configs.e_layers
    n_heads = configs.n_heads
    d_model = configs.d_model
    d_ff = configs.d_ff
    dropout = configs.dropout
    fc_dropout = configs.fc_dropout
    head_dropout = configs.head_dropout

    individual = configs.individual

    patch_len = configs.patch_len
    stride = configs.stride
    padding_patch = configs.padding_patch

    revin = configs.revin
    affine = configs.affine
    subtract_last = configs.subtract_last

    decomposition = configs.decomposition
    kernel_size = configs.kernel_size


    # model
    self.decomposition = decomposition
    if self.decomposition:
      self.decomp_module = series_decomp(kernel_size)
      self.model_trend = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                            max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                            n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                            dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                            attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                            pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                            pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                            subtract_last=subtract_last, verbose=verbose, **kwargs)
      self.model_res = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                            max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                            n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                            dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                            attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                            pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                            pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                            subtract_last=subtract_last, verbose=verbose, **kwargs)
    else:
      self.model = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride,
                            max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model,
                            n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout,
                            dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var,
                            attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn,
                            pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch,
                            pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine,
                            subtract_last=subtract_last, verbose=verbose, **kwargs)


  def forward(self, x):           # x: [Batch, Input length, Channel]
    if self.decomposition:
      res_init, trend_init = self.decomp_module(x)
      res_init, trend_init = res_init.permute(0,2,1), trend_init.permute(0,2,1)  # x: [Batch, Channel, Input length]
      res = self.model_res(res_init)
      trend = self.model_trend(trend_init)
      x = res + trend
      x = x.permute(0,2,1)    # x: [Batch, Input length, Channel]
    else:
      x = x.permute(0,2,1)    # x: [Batch, Channel, Input length]
      x = self.model(x)
      x = x.permute(0,2,1)    # x: [Batch, Input length, Channel]
    return x

In [11]:
class Exp_Main(Exp_Basic):
  def __init__(self, args):
    super(Exp_Main, self).__init__(args)

  def _build_model(self):
    model = Model(self.args).float()

    if self.args.use_multi_gpu and self.args.use_gpu:
      model = nn.DataParallel(model, device_ids=self.args.device_ids)
    return model

  def _get_data(self, flag):
    data_set, data_loader = data_provider(self.args, flag)
    return data_set, data_loader

  def _select_optimizer(self):
    model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
    return model_optim

  def _select_criterion(self):
    criterion = nn.MSELoss()
    return criterion

  def vali(self, vali_data, vali_loader, criterion):
    total_loss = []
    self.model.eval()
    with torch.no_grad():
      for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
        batch_x = batch_x.float().to(self.device)
        batch_y = batch_y.float()

        batch_x_mark = batch_x_mark.float().to(self.device)
        batch_y_mark = batch_y_mark.float().to(self.device)

        # decoder input
        dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
        dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
        # encoder - decoder
        if self.args.use_amp:
          with torch.cuda.amp.autocast():
            if 'Linear' in self.args.model or 'TST' in self.args.model:
              outputs = self.model(batch_x)
            else:
              if self.args.output_attention:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
              else:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
          if 'Linear' in self.args.model or 'TST' in self.args.model:
            outputs = self.model(batch_x)
          else:
            if self.args.output_attention:
              outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
            else:
              outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        f_dim = -1 if self.args.features == 'MS' else 0
        outputs = outputs[:, -self.args.pred_len:, f_dim:]
        batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)

        pred = outputs.detach().cpu()
        true = batch_y.detach().cpu()

        loss = criterion(pred, true)

        total_loss.append(loss)
    total_loss = np.average(total_loss)
    self.model.train()
    return total_loss

  def train(self, setting):
    train_data, train_loader = self._get_data(flag='train')
    vali_data, vali_loader = self._get_data(flag='val')

    path = os.path.join(self.args.checkpoints, setting)
    if not os.path.exists(path):
      os.makedirs(path)

    time_now = time.time()

    train_steps = len(train_loader)
    early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)

    model_optim = self._select_optimizer()
    criterion = self._select_criterion()

    if self.args.use_amp:
      scaler = torch.cuda.amp.GradScaler()

    scheduler = lr_scheduler.OneCycleLR(optimizer = model_optim,
                                        steps_per_epoch = train_steps,
                                        pct_start = self.args.pct_start,
                                        epochs = self.args.train_epochs,
                                        max_lr = self.args.learning_rate)

    for epoch in range(self.args.train_epochs):
      iter_count = 0
      train_loss = []

      self.model.train()
      epoch_time = time.time()
      for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
          iter_count += 1
          model_optim.zero_grad()
          batch_x = batch_x.float().to(self.device)

          batch_y = batch_y.float().to(self.device)
          batch_x_mark = batch_x_mark.float().to(self.device)
          batch_y_mark = batch_y_mark.float().to(self.device)

          # decoder input
          dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
          dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)

          # encoder - decoder
          if self.args.use_amp:
            with torch.cuda.amp.autocast():
              if 'Linear' in self.args.model or 'TST' in self.args.model:
                outputs = self.model(batch_x)
              else:
                if self.args.output_attention:
                  outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
                else:
                  outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

              f_dim = -1 if self.args.features == 'MS' else 0
              outputs = outputs[:, -self.args.pred_len:, f_dim:]
              batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
              loss = criterion(outputs, batch_y)
              train_loss.append(loss.item())
          else:
            if 'Linear' in self.args.model or 'TST' in self.args.model:
              outputs = self.model(batch_x)
            else:
              if self.args.output_attention:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

              else:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark, batch_y)
            # print(outputs.shape,batch_y.shape)
            f_dim = -1 if self.args.features == 'MS' else 0
            outputs = outputs[:, -self.args.pred_len:, f_dim:]
            batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
            loss = criterion(outputs, batch_y)
            train_loss.append(loss.item())

          if (i + 1) % 100 == 0:
            print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
            speed = (time.time() - time_now) / iter_count
            left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
            print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
            iter_count = 0
            time_now = time.time()

          if self.args.use_amp:
            scaler.scale(loss).backward()
            scaler.step(model_optim)
            scaler.update()
          else:
            loss.backward()
            model_optim.step()

          if self.args.lradj == 'TST':
            adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
            scheduler.step()

      print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
      train_loss = np.average(train_loss)
      vali_loss = self.vali(vali_data, vali_loader, criterion)

      print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f}".format(
        epoch + 1, train_steps, train_loss, vali_loss))
      early_stopping(vali_loss, self.model, path)
      if early_stopping.early_stop:
        print("Early stopping")
        break

      if self.args.lradj != 'TST':
        adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args)
      else:
        print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))

    best_model_path = path + '/' + 'checkpoint.pth'
    self.model.load_state_dict(torch.load(best_model_path))

    return self.model

  def test(self, setting, test=0):
    test_data, test_loader = self._get_data(flag='test')

    if test:
      print('loading model')
      self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))

    preds = []
    trues = []
    inputx = []
    folder_path = './test_results/' + setting + '/'
    if not os.path.exists(folder_path):
      os.makedirs(folder_path)

    self.model.eval()
    with torch.no_grad():
      for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
        batch_x = batch_x.float().to(self.device)
        batch_y = batch_y.float().to(self.device)

        batch_x_mark = batch_x_mark.float().to(self.device)
        batch_y_mark = batch_y_mark.float().to(self.device)

        # decoder input
        dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
        dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
        # encoder - decoder
        if self.args.use_amp:
          with torch.cuda.amp.autocast():
            if 'Linear' in self.args.model or 'TST' in self.args.model:
              outputs = self.model(batch_x)
            else:
              if self.args.output_attention:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
              else:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
        else:
          if 'Linear' in self.args.model or 'TST' in self.args.model:
            outputs = self.model(batch_x)
          else:
              if self.args.output_attention:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]

              else:
                outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)

          f_dim = -1 if self.args.features == 'MS' else 0
          # print(outputs.shape,batch_y.shape)
          outputs = outputs[:, -self.args.pred_len:, f_dim:]
          batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
          outputs = outputs.detach().cpu().numpy()
          batch_y = batch_y.detach().cpu().numpy()

          pred = outputs  # outputs.detach().cpu().numpy()  # .squeeze()
          true = batch_y  # batch_y.detach().cpu().numpy()  # .squeeze()

          preds.append(pred)
          trues.append(true)
          inputx.append(batch_x.detach().cpu().numpy())
          if i % 20 == 0:
            input = batch_x.detach().cpu().numpy()
            gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
            pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
            visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))

    if self.args.test_flop:
      test_params_flop((batch_x.shape[1],batch_x.shape[2]))
      exit()
    preds = np.array(preds)
    trues = np.array(trues)
    inputx = np.array(inputx)

    preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
    trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
    inputx = inputx.reshape(-1, inputx.shape[-2], inputx.shape[-1])

    # result save
    folder_path = './results/' + setting + '/'
    if not os.path.exists(folder_path):
      os.makedirs(folder_path)

    mae, mse, rmse, mape, mspe, rse, corr = metric(preds, trues)
    print('mse:{}, mae:{}, rse:{}'.format(mse, mae, rse))
    f = open("result.txt", 'a')
    f.write(setting + "  \n")
    f.write('mse:{}, mae:{}, rse:{}'.format(mse, mae, rse))
    f.write('\n')
    f.write('\n')
    f.close()

    # np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe,rse, corr]))
    np.save(folder_path + 'pred.npy', preds)
    # np.save(folder_path + 'true.npy', trues)
    # np.save(folder_path + 'x.npy', inputx)
    return preds, trues

In [12]:
if __name__ == '__main__':
  parser = argparse.ArgumentParser(description='Autoformer & Transformer family for Time Series Forecasting')

  # random seed
  parser.add_argument('--random_seed', type=int, default=2021, help='random seed')

  # basic config
  parser.add_argument('--is_training', type=bool, default=True, help='status')
  parser.add_argument('--model_id', type=str, default='train', help='model id')
  parser.add_argument('--model', type=str, default='PatchTST')

  # data loader
  parser.add_argument('--data', type=str, default='SP500', help='dataset type')
  parser.add_argument('--dataset', default=dataset)
  parser.add_argument('--features', type=str, default='M',
                      help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
  parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
  parser.add_argument('--freq', type=str, default='h',
                      help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
  parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')

  # forecasting task
  parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
  parser.add_argument('--label_len', type=int, default=5, help='start token length')
  parser.add_argument('--pred_len', type=int, default=5, help='prediction sequence length')


  # DLinear
  #parser.add_argument('--individual', action='store_true', default=False, help='DLinear: a linear layer for each variate(channel) individually')

  # PatchTST
  parser.add_argument('--fc_dropout', type=float, default=0.05, help='fully connected dropout')
  parser.add_argument('--head_dropout', type=float, default=0.0, help='head dropout')
  parser.add_argument('--patch_len', type=int, default=16, help='patch length')
  parser.add_argument('--stride', type=int, default=8, help='stride')
  parser.add_argument('--padding_patch', default='end', help='None: None; end: padding on the end')
  parser.add_argument('--revin', type=int, default=1, help='RevIN; True 1 False 0')
  parser.add_argument('--affine', type=int, default=0, help='RevIN-affine; True 1 False 0')
  parser.add_argument('--subtract_last', type=int, default=0, help='0: subtract mean; 1: subtract last')
  parser.add_argument('--decomposition', type=int, default=0, help='decomposition; True 1 False 0')
  parser.add_argument('--kernel_size', type=int, default=25, help='decomposition-kernel')
  parser.add_argument('--individual', type=int, default=0, help='individual head; True 1 False 0')

  # Formers
  parser.add_argument('--embed_type', type=int, default=0, help='0: default 1: value embedding + temporal embedding + positional embedding 2: value embedding + temporal embedding 3: value embedding + positional embedding 4: value embedding')
  parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') # DLinear with --individual, use this hyperparameter as the number of channels
  parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
  parser.add_argument('--c_out', type=int, default=7, help='output size')
  parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
  parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
  parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
  parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
  parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
  parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
  parser.add_argument('--factor', type=int, default=1, help='attn factor')
  parser.add_argument('--distil', action='store_false',
                      help='whether to use distilling in encoder, using this argument means not using distilling',
                      default=True)
  parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
  parser.add_argument('--embed', type=str, default='timeF',
                      help='time features encoding, options:[timeF, fixed, learned]')
  parser.add_argument('--activation', type=str, default='gelu', help='activation')
  parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
  parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data')

  # optimization
  parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
  parser.add_argument('--itr', type=int, default=2, help='experiments times')
  parser.add_argument('--train_epochs', type=int, default=20, help='train epochs')
  parser.add_argument('--batch_size', type=int, default=128, help='batch size of train input data')
  parser.add_argument('--patience', type=int, default=100, help='early stopping patience')
  parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
  parser.add_argument('--des', type=str, default='test', help='exp description')
  parser.add_argument('--loss', type=str, default='mse', help='loss function')
  parser.add_argument('--lradj', type=str, default='type3', help='adjust learning rate')
  parser.add_argument('--pct_start', type=float, default=0.3, help='pct_start')
  parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)

  # GPU
  parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
  parser.add_argument('--gpu', type=int, default=0, help='gpu')
  parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
  parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
  parser.add_argument('--test_flop', action='store_true', default=False, help='See utils/tools for usage')

  args = parser.parse_args(args=[])

  # random seed
  fix_seed = args.random_seed
  random.seed(fix_seed)
  torch.manual_seed(fix_seed)
  np.random.seed(fix_seed)

  args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

  if args.use_gpu and args.use_multi_gpu:
    args.dvices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

  print('Args in experiment:')
  print(args)

  Exp = Exp_Main

  if args.is_training:
    for ii in range(args.itr):
      # setting record of experiments
      setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'\
                 .format(args.model_id, args.model, args.data, args.features, args.seq_len,\
                         args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers,\
                         args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii)

      exp = Exp(args)  # set experiments
      print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
      exp.train(setting)

      print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
      exp.test(setting)

      torch.cuda.empty_cache()
  else:
    ii = 0
    setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'\
               .format(args.model_id, args.model, args.data, args.features, args.seq_len,\
                       args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers,\
                       args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii)

    exp = Exp(args)  # set experiments
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    preds, trues = exp.test(setting, test=1)

    preds = torch.tensor(preds, dtype=torch.float32)
    trues = torch.tensor(trues, dtype=torch.float32)
    preds = preds.reshape((86280, 1)).cpu()
    trues = trues.reshape((86280, 1)).cpu()
    rankIC = spearman_corrcoef(preds, trues)
    print('RankIC: {}'.format(rankIC))
    rankIR = PearsonCorrCoef()
    rankIR(preds, trues)
    print('RankIR: {}'.format(rankIR.compute()))

    torch.cuda.empty_cache()

Args in experiment:
Namespace(random_seed=2021, is_training=True, model_id='train', model='PatchTST', data='SP500', dataset=              date      close       open       high        low     volume  \
0       2012-05-14  36.299999  36.360001  36.840000  35.979999   230900.0   
1       2012-05-15  36.750000  36.139999  37.119999  36.139999   328500.0   
2       2012-05-16  36.619999  36.950001  37.080002  36.549999   220300.0   
3       2012-05-17  34.979999  36.540001  36.540001  34.830002   418400.0   
4       2012-05-18  35.139999  34.900002  35.660000  34.779999   200300.0   
...            ...        ...        ...        ...        ...        ...   
1197319 2022-05-19  90.250000  91.379997  92.459999  89.540001  5234700.0   
1197320 2022-05-20  90.080002  90.839996  91.169998  88.430000  6585500.0   
1197321 2022-05-23  91.830002  90.599998  92.019997  90.000000  4701300.0   
1197322 2022-05-24  93.209999  91.199997  93.419998  90.470001  5932000.0   
1197323 2022-05-25  93.559998

In [13]:
if __name__ == '__main__':
  parser = argparse.ArgumentParser(description='Autoformer & Transformer family for Time Series Forecasting')

  # random seed
  parser.add_argument('--random_seed', type=int, default=2021, help='random seed')

  # basic config
  parser.add_argument('--is_training', type=bool, default=False, help='status')
  parser.add_argument('--model_id', type=str, default='train', help='model id')
  parser.add_argument('--model', type=str, default='PatchTST')

  # data loader
  parser.add_argument('--data', type=str, default='SP500', help='dataset type')
  parser.add_argument('--dataset', default=dataset)
  parser.add_argument('--features', type=str, default='M',
                      help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
  parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
  parser.add_argument('--freq', type=str, default='h',
                      help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
  parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')

  # forecasting task
  parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
  parser.add_argument('--label_len', type=int, default=5, help='start token length')
  parser.add_argument('--pred_len', type=int, default=5, help='prediction sequence length')


  # DLinear
  #parser.add_argument('--individual', action='store_true', default=False, help='DLinear: a linear layer for each variate(channel) individually')

  # PatchTST
  parser.add_argument('--fc_dropout', type=float, default=0.05, help='fully connected dropout')
  parser.add_argument('--head_dropout', type=float, default=0.0, help='head dropout')
  parser.add_argument('--patch_len', type=int, default=16, help='patch length')
  parser.add_argument('--stride', type=int, default=8, help='stride')
  parser.add_argument('--padding_patch', default='end', help='None: None; end: padding on the end')
  parser.add_argument('--revin', type=int, default=1, help='RevIN; True 1 False 0')
  parser.add_argument('--affine', type=int, default=0, help='RevIN-affine; True 1 False 0')
  parser.add_argument('--subtract_last', type=int, default=0, help='0: subtract mean; 1: subtract last')
  parser.add_argument('--decomposition', type=int, default=0, help='decomposition; True 1 False 0')
  parser.add_argument('--kernel_size', type=int, default=25, help='decomposition-kernel')
  parser.add_argument('--individual', type=int, default=0, help='individual head; True 1 False 0')

  # Formers
  parser.add_argument('--embed_type', type=int, default=0, help='0: default 1: value embedding + temporal embedding + positional embedding 2: value embedding + temporal embedding 3: value embedding + positional embedding 4: value embedding')
  parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') # DLinear with --individual, use this hyperparameter as the number of channels
  parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
  parser.add_argument('--c_out', type=int, default=7, help='output size')
  parser.add_argument('--d_model', type=int, default=512, help='dimension of model')
  parser.add_argument('--n_heads', type=int, default=8, help='num of heads')
  parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
  parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
  parser.add_argument('--d_ff', type=int, default=2048, help='dimension of fcn')
  parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
  parser.add_argument('--factor', type=int, default=1, help='attn factor')
  parser.add_argument('--distil', action='store_false',
                      help='whether to use distilling in encoder, using this argument means not using distilling',
                      default=True)
  parser.add_argument('--dropout', type=float, default=0.05, help='dropout')
  parser.add_argument('--embed', type=str, default='timeF',
                      help='time features encoding, options:[timeF, fixed, learned]')
  parser.add_argument('--activation', type=str, default='gelu', help='activation')
  parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
  parser.add_argument('--do_predict', action='store_true', help='whether to predict unseen future data')

  # optimization
  parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
  parser.add_argument('--itr', type=int, default=2, help='experiments times')
  parser.add_argument('--train_epochs', type=int, default=20, help='train epochs')
  parser.add_argument('--batch_size', type=int, default=128, help='batch size of train input data')
  parser.add_argument('--patience', type=int, default=100, help='early stopping patience')
  parser.add_argument('--learning_rate', type=float, default=0.0001, help='optimizer learning rate')
  parser.add_argument('--des', type=str, default='test', help='exp description')
  parser.add_argument('--loss', type=str, default='mse', help='loss function')
  parser.add_argument('--lradj', type=str, default='type3', help='adjust learning rate')
  parser.add_argument('--pct_start', type=float, default=0.3, help='pct_start')
  parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)

  # GPU
  parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
  parser.add_argument('--gpu', type=int, default=0, help='gpu')
  parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
  parser.add_argument('--devices', type=str, default='0,1,2,3', help='device ids of multile gpus')
  parser.add_argument('--test_flop', action='store_true', default=False, help='See utils/tools for usage')

  args = parser.parse_args(args=[])

  # random seed
  fix_seed = args.random_seed
  random.seed(fix_seed)
  torch.manual_seed(fix_seed)
  np.random.seed(fix_seed)

  args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False

  if args.use_gpu and args.use_multi_gpu:
    args.dvices = args.devices.replace(' ', '')
    device_ids = args.devices.split(',')
    args.device_ids = [int(id_) for id_ in device_ids]
    args.gpu = args.device_ids[0]

  print('Args in experiment:')
  print(args)

  Exp = Exp_Main

  if args.is_training:
    for ii in range(args.itr):
      # setting record of experiments
      setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'\
                 .format(args.model_id, args.model, args.data, args.features, args.seq_len,\
                         args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers,\
                         args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii)

      exp = Exp(args)  # set experiments
      print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
      exp.train(setting)

      print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
      exp.test(setting)

      torch.cuda.empty_cache()
  else:
    ii = 0
    setting = '{}_{}_{}_ft{}_sl{}_ll{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'\
               .format(args.model_id, args.model, args.data, args.features, args.seq_len,\
                       args.label_len, args.pred_len, args.d_model, args.n_heads, args.e_layers,\
                       args.d_layers, args.d_ff, args.factor, args.embed, args.distil, args.des, ii)

    exp = Exp(args)  # set experiments
    print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
    preds, trues = exp.test(setting, test=1)

    preds = torch.tensor(preds, dtype=torch.float32)
    trues = torch.tensor(trues, dtype=torch.float32)
    preds = preds.reshape((86280, 1)).cpu()
    trues = trues.reshape((86280, 1)).cpu()
    rankIC = spearman_corrcoef(preds, trues)
    print('RankIC: {}'.format(rankIC))
    rankIR = PearsonCorrCoef()
    rankIR(preds, trues)
    print('RankIR: {}'.format(rankIR.compute()))

    torch.cuda.empty_cache()

Args in experiment:
Namespace(random_seed=2021, is_training=False, model_id='train', model='PatchTST', data='SP500', dataset=              date      close       open       high        low     volume  \
0       2012-05-14  36.299999  36.360001  36.840000  35.979999   230900.0   
1       2012-05-15  36.750000  36.139999  37.119999  36.139999   328500.0   
2       2012-05-16  36.619999  36.950001  37.080002  36.549999   220300.0   
3       2012-05-17  34.979999  36.540001  36.540001  34.830002   418400.0   
4       2012-05-18  35.139999  34.900002  35.660000  34.779999   200300.0   
...            ...        ...        ...        ...        ...        ...   
1197319 2022-05-19  90.250000  91.379997  92.459999  89.540001  5234700.0   
1197320 2022-05-20  90.080002  90.839996  91.169998  88.430000  6585500.0   
1197321 2022-05-23  91.830002  90.599998  92.019997  90.000000  4701300.0   
1197322 2022-05-24  93.209999  91.199997  93.419998  90.470001  5932000.0   
1197323 2022-05-25  93.55999