In [1]:
import datetime

import numpy as np
import matplotlib.pyplot as plt

import torch
torch.device('cuda:1')
import torch.nn as nn

from torch.utils.data import DataLoader, random_split
from src.dataset import HFTestDataset
from tqdm import tqdm
import seaborn as sns

import gc
from tst import Transformer
import tst.utilities as ut

In [2]:
# del HFTestDataset
# gc.collect()

In [3]:
tOpt = ut.TrainingOptions(BATCH_SIZE=8,
                          NUM_WORKERS=4,
                          LR=1e-3,
                          EPOCHS=100,
                          N_stack=3,
                          heads=2,
                          query=8,
                          value=8,
                          d_model=128,
                          d_input=43,
                          d_output=1,
                          attention_size = 50,
                          window = 200,
                          padding = 50,
                          chunk_mode = 'window'
                          )

In [4]:
model_path = '../train_dir/model/transformer/param_comb_4/'
model_name = 'ddpTransformer_epoch_60.pth.tar'

In [5]:
import os

In [6]:
model_data = torch.load(os.path.join(model_path, model_name))

In [7]:
os.path.join(model_path, model_name)

'../train_dir/model/transformer/param_comb_4/ddpTransformer_epoch_60.pth.tar'

In [8]:
from collections import OrderedDict
local_state_dict = OrderedDict()
for k, v in model_data['state_dict'].items():
    name = k[7:] # remove 'module.' of DataParallel/DistributedDataParallel
    local_state_dict[name] = v

In [9]:
model_data['state_dict']

OrderedDict([('module.layers_encoding.0._selfAttention._future_mask',
              tensor([[False,  True,  True,  ...,  True,  True,  True],
                      [False, False,  True,  ...,  True,  True,  True],
                      [False, False, False,  ...,  True,  True,  True],
                      ...,
                      [False, False, False,  ..., False,  True,  True],
                      [False, False, False,  ..., False, False,  True],
                      [False, False, False,  ..., False, False, False]], device='cuda:0')),
             ('module.layers_encoding.0._selfAttention._attention_mask',
              tensor([[False, False, False,  ...,  True,  True,  True],
                      [False, False, False,  ...,  True,  True,  True],
                      [False, False, False,  ...,  True,  True,  True],
                      ...,
                      [ True,  True,  True,  ..., False, False, False],
                      [ True,  True,  True,  ..., False, False,

In [10]:
model = Transformer(tOpt)

In [11]:
model.load_state_dict(local_state_dict)

<All keys matched successfully>

In [12]:
def load_data(local_rank, world_size, epoch_idx, total_epoch, mode = 'train'):
    if not epoch_idx:
        epoch_idx = 0
    rs = ut.redis_connection(db = 0)
    world_dict = ut.read_data_from_redis(rs, f'{mode}_world_dict_{world_size}_epoch_{total_epoch}')
    keys = world_dict[epoch_idx][local_rank]

    data = []
    for k in keys:
        k_numpy = ut.read_data_from_redis(rs, k)
        if len(k_numpy) > 4800:
            continue
        data.append(k_numpy)
    rs.close()

    return data

In [13]:
test_data = load_data(0, 1, epoch_idx = None, total_epoch = 1, mode = 'test')

In [14]:
test_dataset = HFTestDataset(test_data[:500], tick_num = tOpt.window)

In [15]:
test_dataloader = DataLoader(test_dataset,
                             batch_size=2,
                             num_workers=1,
                             shuffle = False)

In [16]:
y_pred_all = []
y_all = []

In [17]:
model.eval()

Transformer(
  (layers_encoding): ModuleList(
    (0): Encoder(
      (_selfAttention): MultiHeadAttentionWindow(
        (_W_q): Linear(in_features=128, out_features=16, bias=True)
        (_W_k): Linear(in_features=128, out_features=16, bias=True)
        (_W_v): Linear(in_features=128, out_features=16, bias=True)
        (_W_o): Linear(in_features=16, out_features=128, bias=True)
      )
      (_feedForward): PositionwiseFeedForward(
        (_linear1): Linear(in_features=128, out_features=2048, bias=True)
        (_linear2): Linear(in_features=2048, out_features=128, bias=True)
      )
      (_layerNorm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (_layerNorm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      (_dropout): Dropout(p=0.3, inplace=False)
    )
    (1): Encoder(
      (_selfAttention): MultiHeadAttentionWindow(
        (_W_q): Linear(in_features=128, out_features=16, bias=True)
        (_W_k): Linear(in_features=128, out_features=16, bias=T

In [18]:
model = model.cuda()

In [19]:
with torch.no_grad():
    for x, y in tqdm(test_dataloader, total=len(test_dataloader)):
        y_pred = model(x.cuda()).to('cpu') 
        y_pred_all.append(y_pred.detach().numpy())
        y_all.append(y.detach().numpy())

  2%|██▏                                                                                                                              | 3971/240000 [00:36<35:46, 109.97it/s]


RuntimeError: Too many open files. Communication with the workers is no longer possible. Please increase the limit using `ulimit -n` in the shell or change the sharing strategy by calling `torch.multiprocessing.set_sharing_strategy('file_system')` at the beginning of your code

In [None]:
from scipy.stats import spearmanr

In [None]:
y_pred_concat = np.concatenate(y_pred_all)
y_concat = np.concatenate(y_all)
y_pred_concat = y_pred_concat.reshape(-1, 1)
y_concat = y_concat.reshape(-1, 1)

In [None]:
spearmanr(y_pred_concat, y_concat)

In [None]:
encoder = model.layers_encoding[0]
attn_map = encoder.attention_map[0].cpu()

In [None]:
# Plot
plt.figure(figsize=(12, 12))
sns.heatmap(attn_map)
plt.savefig("attention_map")