In [2]:
import numpy as np
import pandas as pd

import os.path as op
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3"

import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split

torch.manual_seed(42)

IMAGE_WIDTH = {5: 15, 20: 60, 60: 180}
IMAGE_HEIGHT = {5: 32, 20: 64, 60: 96}  

## load data

### have a look first

In [3]:
year = 2017
images = np.memmap(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_images.dat"), dtype=np.uint8, mode='r').reshape(
                        (-1, IMAGE_HEIGHT[20], IMAGE_WIDTH[20]))

In [52]:
print(images.shape)
images[0]

(67858, 64, 60)


memmap([[  0,   0,   0, ...,   0,   0,   0],
        [  0,   0,   0, ...,   0,   0,   0],
        [  0,   0,   0, ...,   0,   0,   0],
        ...,
        [  0, 255,   0, ...,   0,   0,   0],
        [  0, 255,   0, ...,   0,   0,   0],
        [  0, 255,   0, ...,   0, 255,   0]], dtype=uint8)

In [53]:
label_df = pd.read_feather(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_labels_w_delay.feather"))
assert(len(label_df) == len(images))
label_df.head()

Unnamed: 0,Date,StockID,MarketCap,Ret_5d,Ret_20d,Ret_60d,Ret_month,EWMA_vol
0,2017-01-31,10001,133078.0,4.37039e-07,-2e-06,-0.005954,-2e-06,0.00045
1,2017-02-28,10001,133078.0,0.003951997,0.002795,0.009953,0.009953,0.00018
2,2017-03-31,10001,133604.0,-0.007874612,-0.015749,0.021723,-0.015749,6.4e-05
3,2017-04-28,10001,131500.0,0.00999988,0.016001,0.038072,0.016001,3e-05
4,2017-05-31,10001,133604.0,4.37039e-07,0.021722,,0.023703,1.5e-05


### load

In [4]:
year_list = np.arange(1993,2001,1)

In [5]:
images = []
label_df = []
for year in year_list:
    images.append(np.memmap(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_images.dat"), dtype=np.uint8, mode='r').reshape(
                        (-1, IMAGE_HEIGHT[20], IMAGE_WIDTH[20])))
    label_df.append(pd.read_feather(op.join("./monthly_20d", f"20d_month_has_vb_[20]_ma_{year}_labels_w_delay.feather")))
    
images = np.concatenate(images)
label_df = pd.concat(label_df)

print(images.shape)
print(label_df.shape)

(793019, 64, 60)
(793019, 8)


## build dataset

In [6]:
class MyDataset(Dataset):
    
    def __init__(self, img, label):
        self.img = torch.Tensor(img.copy())
        self.label = torch.Tensor(label)
        self.len = len(img)
  
    def __len__(self):
        return self.len

    def __getitem__(self, idx):
        return self.img[idx], self.label[idx]

In [7]:
dataset = MyDataset(images, (label_df.Ret_20d > 0).values)

In [8]:
train_val_ratio = 0.7
train_dataset, val_dataset = random_split(dataset, [int(dataset.len*train_val_ratio), dataset.len-int(dataset.len*train_val_ratio)], \
                                           generator=torch.Generator().manual_seed(42))

In [9]:
train_dataloader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=256, shuffle=False)

## show the shape layer by layer

In [11]:
# padding = ((kernel_size-1) * stride * dilation) / 2

x = torch.Tensor(images[0].copy())
print(x.shape)
x = x.reshape(-1,1,64,60)
x = nn.Conv2d(1,64,kernel_size=(5,3),stride=(3,1),dilation=(2,1),padding=(12,1))(x)
print(x.shape)
x = nn.MaxPool2d((2, 1), stride=(2, 1))(x)
print(x.shape)
x = nn.Conv2d(64,128,kernel_size=(5,3),stride=(3,1),dilation=(2,1),padding=(12,1))(x)
print(x.shape)
x = nn.MaxPool2d((2, 1), stride=(2, 1))(x)
print(x.shape)
x = nn.Conv2d(128,256,kernel_size=(5,3),stride=(3,1),dilation=(2,1),padding=(12,1))(x)
print(x.shape)
x = nn.MaxPool2d((2, 1), stride=(2, 1))(x)
print(x.shape)
x = x.flatten()
print(x.shape)
x = nn.Linear(46080, 2)(x)
print(x.shape)

torch.Size([64, 60])
torch.Size([1, 64, 27, 60])
torch.Size([1, 64, 13, 60])
torch.Size([1, 128, 10, 60])
torch.Size([1, 128, 5, 60])
torch.Size([1, 256, 7, 60])
torch.Size([1, 256, 3, 60])
torch.Size([46080])
torch.Size([2])


# the neural network

In [10]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(64),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(46080, 2),
        )
        self.softmax = nn.Softmax(dim=1)
       
    def forward(self, x):
        x = x.reshape(-1,1,64,60)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.reshape(-1,46080)
        x = self.fc1(x)
        x = self.softmax(x)
        return x

In [11]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
        m.bias.data.fill_(0.)
    elif isinstance(m, nn.Conv2d):
        torch.nn.init.xavier_uniform_(m.weight)

In [12]:
net = Net().cuda()
net.apply(init_weights)
net = nn.DataParallel(net)

### have a look

In [63]:
net.eval()

DataParallel(
  (module): Net(
    (layer1): Sequential(
      (0): Conv2d(1, 64, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01, inplace=True)
      (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (layer2): Sequential(
      (0): Conv2d(64, 128, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): LeakyReLU(negative_slope=0.01, inplace=True)
      (3): MaxPool2d(kernel_size=(2, 1), stride=(2, 1), padding=0, dilation=1, ceil_mode=False)
    )
    (layer3): Sequential(
      (0): Conv2d(128, 256, kernel_size=(5, 3), stride=(3, 1), padding=(12, 1), dilation=(2, 1))
      (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): Le

In [64]:
count = 0
for name, parameters in net.named_parameters():
    print(name, ':', parameters.size())
    count += parameters.numel()
count

module.layer1.0.weight : torch.Size([64, 1, 5, 3])
module.layer1.0.bias : torch.Size([64])
module.layer1.1.weight : torch.Size([64])
module.layer1.1.bias : torch.Size([64])
module.layer2.0.weight : torch.Size([128, 64, 5, 3])
module.layer2.0.bias : torch.Size([128])
module.layer2.1.weight : torch.Size([128])
module.layer2.1.bias : torch.Size([128])
module.layer3.0.weight : torch.Size([256, 128, 5, 3])
module.layer3.0.bias : torch.Size([256])
module.layer3.1.weight : torch.Size([256])
module.layer3.1.bias : torch.Size([256])
module.fc1.1.weight : torch.Size([2, 46080])
module.fc1.1.bias : torch.Size([2])


708866

### simple test

In [16]:
X,y = next(iter(train_dataloader))

In [17]:
y_pred = net(X.to('cuda'))
y_pred

tensor([[0.0664, 0.9336],
        [0.0045, 0.9955],
        [0.0060, 0.9940],
        ...,
        [0.1372, 0.8628],
        [0.0495, 0.9505],
        [0.0120, 0.9880]], device='cuda:0', grad_fn=<GatherBackward>)

In [18]:
nn.CrossEntropyLoss()(y_pred, y.to('cuda').long())

tensor(0.8068, device='cuda:0', grad_fn=<NllLossBackward0>)

## train

In [15]:
def train_loop(dataloader, net, loss_fn, optimizer):
    
    size = len(dataloader.dataset)
    running_loss = 0.0
    total_loss = 0.0
    current = 0
    net.train()
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to('cuda')
        y = y.to('cuda')
        y_pred = net(X)
        loss = loss_fn(y_pred, y.long())
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        total_loss = (len(X) * running_loss + total_loss * current) / (len(X) + current)
        current += len(X)
        if batch % 500 == 499:
            print(f"batch: {batch+1} loss: {running_loss:>7f}  [{current:>5d}/{size:>5d}]")
        running_loss = 0.0
            
    return total_loss

In [16]:
def eval_loop(dataloader, net, loss_fn):
    
    size = len(dataloader.dataset)
    running_loss = 0.0
    total_loss = 0.0
    current = 0
    net.eval()
    
    for batch, (X, y) in enumerate(dataloader):
        X = X.to('cuda')
        y = y.to('cuda')
        y_pred = net(X)
        loss = loss_fn(y_pred, y.long())

        running_loss += loss.item()
        
        total_loss = (len(X) * running_loss + total_loss * current) / (len(X) + current)
        current += len(X)
        running_loss = 0.0
            
    return total_loss

In [31]:
# net = torch.load('/home/clidg/proj_2/pt/baseline_epoch_10_train_0.6865865240322523_eval_0.686580_.pt')

In [17]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader, net, loss_fn, optimizer)
    eval_loss = eval_loop(val_dataloader, net, loss_fn)
    torch.save(net, 'pt/'+'baseline_epoch_{}_train_{:5f}_eval_{:5f}_.pt'.format(t+1, train_loss, eval_loss)) 
    if eval_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = eval_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

print("Done!")

Epoch 1
-------------------------------
batch: 500 loss: 0.743383  [64000/555113]
batch: 1000 loss: 0.761027  [128000/555113]
batch: 1500 loss: 0.749299  [192000/555113]
batch: 2000 loss: 0.753390  [256000/555113]
batch: 2500 loss: 0.767622  [320000/555113]
batch: 3000 loss: 0.751884  [384000/555113]
batch: 3500 loss: 0.716507  [448000/555113]
batch: 4000 loss: 0.706796  [512000/555113]
Epoch 2
-------------------------------
batch: 500 loss: 0.729995  [64000/555113]
batch: 1000 loss: 0.712267  [128000/555113]
batch: 1500 loss: 0.701640  [192000/555113]
batch: 2000 loss: 0.702505  [256000/555113]
batch: 2500 loss: 0.721700  [320000/555113]
batch: 3000 loss: 0.723054  [384000/555113]
batch: 3500 loss: 0.691526  [448000/555113]
batch: 4000 loss: 0.733267  [512000/555113]
Epoch 3
-------------------------------
batch: 500 loss: 0.711153  [64000/555113]
batch: 1000 loss: 0.717331  [128000/555113]
batch: 1500 loss: 0.688529  [192000/555113]
batch: 2000 loss: 0.732272  [256000/555113]
batch:

In [9]:
class Net_gelu(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(64),
            nn.GELU(),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(128),
            nn.GELU(),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.BatchNorm2d(256),
            nn.GELU(),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(46080, 2),
        )
        self.softmax = nn.Softmax(dim=1)
       
    def forward(self, x):
        x = x.reshape(-1,1,64,60)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.reshape(-1,46080)
        x = self.fc1(x)
        x = self.softmax(x)
        return x

In [10]:
net_gelu = Net_gelu().cuda()
net_gelu = nn.DataParallel(net_gelu)

In [17]:
net_gelu = torch.load('pt/gelu_epoch_16_train_0.6794522283735498_eval_0.686493_.pt')

In [18]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net_gelu.parameters(), lr=1e-5)

start_epoch = 16
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader, net_gelu, loss_fn, optimizer)
    eval_loss = eval_loop(val_dataloader, net_gelu, loss_fn)
    torch.save(net_gelu, 'pt/'+'gelu_epoch_{}_train_{}_eval_{:5f}_.pt'.format(t+1, train_loss, eval_loss)) 
    if eval_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = eval_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

print("Done!")

Epoch 17
-------------------------------
batch: 50 loss: 0.676524  [51200/555113]
batch: 100 loss: 0.677482  [102400/555113]
batch: 150 loss: 0.691183  [153600/555113]
batch: 200 loss: 0.678448  [204800/555113]
batch: 250 loss: 0.677435  [256000/555113]
batch: 300 loss: 0.687906  [307200/555113]
batch: 350 loss: 0.681285  [358400/555113]
batch: 400 loss: 0.683288  [409600/555113]
batch: 450 loss: 0.681180  [460800/555113]
batch: 500 loss: 0.689109  [512000/555113]
Epoch 18
-------------------------------
batch: 50 loss: 0.683059  [51200/555113]
batch: 100 loss: 0.678482  [102400/555113]
batch: 150 loss: 0.673867  [153600/555113]
batch: 200 loss: 0.686086  [204800/555113]
batch: 250 loss: 0.683032  [256000/555113]
batch: 300 loss: 0.679130  [307200/555113]
batch: 350 loss: 0.678020  [358400/555113]
batch: 400 loss: 0.675354  [409600/555113]
batch: 450 loss: 0.671298  [460800/555113]
batch: 500 loss: 0.676521  [512000/555113]
Epoch 19
-------------------------------
batch: 50 loss: 0.673

In [28]:
class Net_ln(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.LayerNorm([64, 27, 60]),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(64, 128, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.LayerNorm([128, 10, 60]),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(128, 256, kernel_size=(5,3), stride=(3,1), dilation=(2,1), padding=(12,1)),
            nn.LayerNorm([256, 7, 60]),
            nn.LeakyReLU(negative_slope=0.01, inplace=True),
            nn.MaxPool2d((2, 1), stride=(2, 1)),
        )
        self.fc1 = nn.Sequential(
            nn.Dropout(p=0.5),
            nn.Linear(46080, 2),
        )
        self.softmax = nn.Softmax(dim=1)
       
    def forward(self, x):
        x = x.reshape(-1,1,64,60)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = x.reshape(-1,46080)
        x = self.fc1(x)
        x = self.softmax(x)
        return x

In [29]:
net_ln = Net_ln().cuda()
net_ln = nn.DataParallel(net_ln)

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net_ln.parameters(), lr=1e-5)

start_epoch = 0
min_val_loss = 1e9
last_min_ind = -1
early_stopping_epoch = 5

epochs = 100
for t in range(start_epoch, epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loss = train_loop(train_dataloader, net_ln, loss_fn, optimizer)
    eval_loss = eval_loop(val_dataloader, net_ln, loss_fn)
    torch.save(net_ln, 'pt/'+'ln_epoch_{}_train_{}_eval_{:5f}_.pt'.format(t+1, train_loss, eval_loss)) 
    if eval_loss < min_val_loss:
        last_min_ind = t
        min_val_loss = eval_loss
    elif t - last_min_ind >= early_stopping_epoch:
        break

print("Done!")

Epoch 1
-------------------------------
batch: 50 loss: 0.710754  [51200/555113]
batch: 100 loss: 0.708510  [102400/555113]
batch: 150 loss: 0.705727  [153600/555113]
batch: 200 loss: 0.711209  [204800/555113]
batch: 250 loss: 0.701622  [256000/555113]
batch: 300 loss: 0.706289  [307200/555113]
batch: 350 loss: 0.694266  [358400/555113]
batch: 400 loss: 0.698518  [409600/555113]
batch: 450 loss: 0.700199  [460800/555113]
batch: 500 loss: 0.695487  [512000/555113]
Epoch 2
-------------------------------
batch: 50 loss: 0.703568  [51200/555113]
batch: 100 loss: 0.694036  [102400/555113]
batch: 150 loss: 0.693724  [153600/555113]
batch: 200 loss: 0.693416  [204800/555113]
batch: 250 loss: 0.693527  [256000/555113]
batch: 300 loss: 0.698224  [307200/555113]
batch: 350 loss: 0.685872  [358400/555113]
batch: 400 loss: 0.694834  [409600/555113]
batch: 450 loss: 0.696629  [460800/555113]
batch: 500 loss: 0.693313  [512000/555113]
Epoch 3
-------------------------------
batch: 50 loss: 0.695878