In [1]:
import numpy as np
import pandas as pd

import os.path as op
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

torch.manual_seed(42)

IMAGE_WIDTH = {5: 15, 20: 60, 60: 180}
IMAGE_HEIGHT = {5: 32, 20: 64, 60: 96}  

In [2]:
from models import *

## load data

In [3]:
year_list = np.arange(2001,2020,1)

In [4]:
images = []
label_df = []
for year in year_list:
    images.append(np.memmap(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_images.dat"), dtype=np.uint8, mode='r').reshape(
                        (-1, IMAGE_HEIGHT[20], IMAGE_WIDTH[20])))
    label_df.append(pd.read_feather(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_labels_w_delay.feather")))
    
images = np.concatenate(images)
label_df = pd.concat(label_df)

print(images.shape)
print(label_df.shape)

(1403975, 64, 60)
(1403975, 8)


## build dataset

In [16]:
class MyDataset(Dataset):
    
    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)
  
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

In [17]:
dataset = MyDataset(images, (label_df.Ret_20d > 0).values)

In [18]:
test_dataloader = DataLoader(dataset, batch_size=2048, shuffle=False)

# the neural network

In [73]:
net_path = '/home/clidg/proj_2/pt/baseline_epoch_10_train_0.688653_eval_0.686537_.pt'

In [74]:
net = torch.load(net_path)

In [75]:
net.device_ids = [0,1,2,3]

### have a look

In [21]:
net.eval()

DataParallel(
  (module): Net(
    (layer1): Sequential(
      (0): Conv2d(1, 64, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01, inplace=True)
      (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (layer2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01, inplace=True)
      (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (layer3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Le

## test

In [54]:
def eval_loop(dataloader, net, loss_fn):
    
    size = len(dataloader.dataset)
    running_loss = 0.0
    total_loss = 0.0
    current = 0
    net.eval()
    target = []
    result = []
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to('cuda')
        y = y.to('cuda')
        y_pred = net(X)
        target.append(y.detach().cpu().numpy())
        result.append(y_pred.detach().cpu().numpy())
        loss = loss_fn(y_pred, y.long())
        
        running_loss += loss.item()
        
        total_loss = (len(X) * running_loss + total_loss * current) / (len(X) + current)
        current += len(X)
        running_loss = 0.0
            
    return total_loss, np.concatenate(result), np.concatenate(target)

In [None]:
loss_fn = nn.CrossEntropyLoss()
test_loss, y_pred, y_target = eval_loop(test_dataloader, net, loss_fn)

print(net_path)
print(test_loss)

#np.save('baseline_y_pred.npy', y_pred)
#np.save('baseline_y_target.npy', y_target)
#np.save('baseline_y_ret.npy', label_df.Ret_20d.values)

acc = (np.argmax(y_pred, axis = 1) == y_target).sum()/len(y_pred)
print(acc)


print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='pearson'))

print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='spearman'))

ret = pd.Series(np.concatenate([label_df.Ret_20d.values[np.argmax(y_pred, axis = 1)], 
                -label_df.Ret_20d.values[1-np.argmax(y_pred, axis = 1)]]))

print(ret.mean()/ret.std() * np.sqrt(252/20))

In [64]:
loss_fn = nn.CrossEntropyLoss()
test_loss, y_pred, y_target = eval_loop(test_dataloader, net, loss_fn)

print(net_path)
print(test_loss)

acc = (np.argmax(y_pred, axis = 1) == y_target).sum()/len(y_pred)
print(acc)


print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='pearson'))

print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='spearman'))

ret = pd.Series(np.concatenate([label_df.Ret_20d.values[np.argmax(y_pred, axis = 1)], 
                -label_df.Ret_20d.values[1-np.argmax(y_pred, axis = 1)]]))

print(ret.mean()/ret.std() * np.sqrt(252/20))

/home/clidg/proj_2/pt/gelu_epoch_9_train_0.686015_eval_0.686316_.pt
0.6935985478675337
0.524918891005894
0.003044967773768082
0.004660699432851827
0.050686978323795276


In [68]:
loss_fn = nn.CrossEntropyLoss()
test_loss, y_pred, y_target = eval_loop(test_dataloader, net, loss_fn)

print(net_path)
print(test_loss)

acc = (np.argmax(y_pred, axis = 1) == y_target).sum()/len(y_pred)
print(acc)


print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='pearson'))

print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='spearman'))

ret = pd.Series(np.concatenate([label_df.Ret_20d.values[np.argmax(y_pred, axis = 1)], 
                -label_df.Ret_20d.values[1-np.argmax(y_pred, axis = 1)]]))

print(ret.mean()/ret.std() * np.sqrt(252/20))

/home/clidg/proj_2/pt/gelu_ln_epoch_10_train_0.681694_eval_0.685956_.pt
0.692950108825877
0.526350540429851
0.0032196061346426988
0.004933092240856449
0.14208357799707144


In [72]:
loss_fn = nn.CrossEntropyLoss()
test_loss, y_pred, y_target = eval_loop(test_dataloader, net, loss_fn)

print(net_path)
print(test_loss)

acc = (np.argmax(y_pred, axis = 1) == y_target).sum()/len(y_pred)
print(acc)

print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='pearson'))

print(pd.Series(y_pred[:,1]).corr(label_df.Ret_20d, method='spearman'))

ret = pd.Series(np.concatenate([label_df.Ret_20d.values[np.argmax(y_pred, axis = 1)], 
                -label_df.Ret_20d.values[1-np.argmax(y_pred, axis = 1)]]))

print(ret.mean()/ret.std() * np.sqrt(252/20))

/home/clidg/proj_2/pt/ln_epoch_13_train_0.680397_eval_0.685894_.pt
0.6923807588988327
0.5285528588472017
0.002919619184014595
0.004514477115001684
0.5040300091688483
