# Import

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import timedelta
from tqdm import tqdm
import logging
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from src.normalize_split import inverse_minmax_scale


# Dataset

In [None]:
class KrankenDataSet(Dataset):
    def __init__(self, input_data_pth, xlabel, ylabel):
        data = np.load(input_data_pth)
        self.X = torch.from_numpy(data[xlabel]).to(torch.float)
        self.y = torch.from_numpy(data[ylabel]).to(torch.float)

    def __len__(self):
        return self.y.shape[0]

    def __getitem__(self, idx):
        
        return self.X[idx, ...], self.y[idx, ...]

# Model

In [None]:
class PriceNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv_layer1 = nn.Sequential(
            nn.Conv1d(in_channels=1, out_channels=50, kernel_size=6, stride=1),
            nn.BatchNorm1d(num_features=50),
            nn.ReLU(),
        )
        
        self.attn_layer1 = nn.MultiheadAttention(
            embed_dim = 85,
            num_heads = 5,
            dropout = 0.2,
            batch_first=True,
        )

        self.conv_layer2 = nn.Sequential(
            nn.Conv1d(in_channels=50, out_channels=100, kernel_size=7, stride=2),
            nn.BatchNorm1d(num_features=100),
            nn.ReLU(),
        )

        self.attn_layer2 = nn.MultiheadAttention(
            embed_dim = 40,
            num_heads = 4,
            dropout = 0.2,
            batch_first=True,
        )

        self.flatten_and_transform = nn.Sequential(
            nn.Flatten(1, 2),  # (M*N, 100 * 40)  -> (M*N, 200)
            nn.Linear(in_features=4000, out_features=200)
        )

        self.attn_layer3 = nn.MultiheadAttention(
            embed_dim = 200,
            num_heads = 5,
            dropout = 0.2,
            batch_first=True,
        )

        self.linear_layer = nn.Sequential(
            nn.Linear(in_features=200, out_features=50),
            nn.ReLU(),
            nn.Dropout(p=0.2),
            nn.Linear(in_features=50, out_features=1),
            )


    def forward(self, X):
        # X: (M, N, time)
        M, N, _ = X.shape
        out = torch.flatten(X, 0, 1)  # (M, N, time) -> (M*N, time)
        out = torch.unsqueeze(out, dim=1)  # -> (M*N, 1, time)
        out = self.conv_layer1(out)  # -> (M*N, 50, 85)
        out, _ = self.attn_layer1(out, out, out, need_weights = False, average_attn_weights = False)  # -> (M*N, 50, 85)
        out = self.conv_layer2(out)  # -> (M*N, 100, 40)
        out, _ = self.attn_layer2(out, out, out, need_weights = False, average_attn_weights = False)  # -> (M*N, 100, 40)
        out = self.flatten_and_transform(out)  # -> (M*N, 200)
        out = out.reshape(M, N, out.shape[1])  # -> (M, N, 200)
        out, _ = self.attn_layer3(out, out, out, need_weights = False, average_attn_weights = False)  # -> (M, N, 200)
        out = self.linear_layer(out)  # -> (M, N, 1)

        return out
    


# Trainer

In [29]:
class CryptoTrainer:
    def __init__(self, lr, minmax_pth=None):
        self.lr = lr
        self.criterion = nn.MSELoss()
        self.model = PriceNet()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)

        self.mins = None
        self.maxs = None
        if minmax_pth:
            minmax_df = pd.read_csv(minmax_pth)
            self.mins = minmax_df['mins'].to_numpy().reshape(1, -1, 1)
            self.maxs = minmax_df['maxs'].to_numpy().reshape(1, -1, 1)


    def forward_pass(self, X, y):
        y_pred = self.model(X)
        loss = self.criterion(y_pred, y)
        return loss
    
    def test(self, X_test, y_test):
        with torch.no_grad():
            test_loss = self.forward_pass(X_test, y_test)
        return test_loss.item()  
    
    def train(self, X_train, y_train):
        train_loss = self.forward_pass(X_train, y_train)
        train_loss.backward()
        self.optimizer.step()
        self.optimizer.zero_grad()
        return train_loss.item()
    
    def eval(self, X_test, y_test):
        assert (self.mins is not None) and (self.maxs is not None)
        
        with torch.no_grad():
            y_pred = self.model(X_test)
        
        x_test_ori = inverse_minmax_scale(X_test[:, :, [-1]].numpy(), self.mins, self.maxs)
        y_test_ori = inverse_minmax_scale(y_test.numpy(), self.mins, self.maxs)
        y_pred_ori = inverse_minmax_scale(y_pred.numpy(), self.mins, self.maxs)
        
        # Compute fraction change
        pred_frac = (y_pred_ori - x_test_ori) / x_test_ori
        real_frac = (y_test_ori - x_test_ori) / x_test_ori
        weighting = np.abs(pred_frac)/np.sum(np.abs(pred_frac), axis=1).reshape(pred_frac.shape[0], 1, 1)
        aver_winrate = np.mean(np.sum(np.sign(pred_frac) * real_frac * weighting, axis=1))
        return aver_winrate, (x_test_ori, y_test_ori, y_pred_ori)
        



In [30]:
dataset_train = KrankenDataSet('data/train_test_data.npz', xlabel='X_train', ylabel='y_train')
dataset_test = KrankenDataSet('data/train_test_data.npz', xlabel='X_test', ylabel='y_test')

dataloader_train = DataLoader(dataset_train, batch_size=32, shuffle=True)
dataloader_test = DataLoader(dataset_test, batch_size=len(dataset_test))




In [31]:
trainer = CryptoTrainer(lr=1e-4, minmax_pth='data/minmax.csv')

trainer.model.train()
for X_train, y_train in tqdm(dataloader_train):
    train_loss = trainer.train(X_train, y_train)

trainer.model.eval()
for X_test, y_test in tqdm(dataloader_test):
    test_loss = trainer.test(X_test, y_test)

  7%|▋         | 1/15 [00:16<03:57, 17.00s/it]

0.07124023139476776


 13%|█▎        | 2/15 [00:33<03:40, 16.94s/it]

0.047896623611450195


 20%|██        | 3/15 [00:49<03:18, 16.53s/it]

0.04241904616355896


 27%|██▋       | 4/15 [01:05<02:59, 16.34s/it]

0.032003115862607956


 33%|███▎      | 5/15 [01:22<02:42, 16.29s/it]

0.03154659643769264


 40%|████      | 6/15 [01:37<02:24, 16.10s/it]

0.03552369773387909


 47%|████▋     | 7/15 [01:53<02:08, 16.01s/it]

0.03413328155875206


 53%|█████▎    | 8/15 [02:09<01:51, 15.87s/it]

0.03411031514406204


 60%|██████    | 9/15 [02:24<01:34, 15.81s/it]

0.03136496990919113


 67%|██████▋   | 10/15 [02:40<01:18, 15.76s/it]

0.035777222365140915


 73%|███████▎  | 11/15 [02:56<01:02, 15.71s/it]

0.02812858298420906


 80%|████████  | 12/15 [03:11<00:47, 15.68s/it]

0.03479861095547676


 87%|████████▋ | 13/15 [03:27<00:31, 15.67s/it]

0.030077047646045685


 93%|█████████▎| 14/15 [03:43<00:15, 15.76s/it]

0.02979174815118313


100%|██████████| 15/15 [04:00<00:00, 16.00s/it]

0.02270025946199894





In [32]:
X_test, y_test = next(iter(dataloader_test))
trainer.model.eval()
aver_winrate, (x_test_ori, y_test_ori, y_pred_ori) = trainer.eval(X_test, y_test)

In [33]:
pred_frac = (y_pred_ori - x_test_ori) / x_test_ori
real_frac = (y_test_ori - x_test_ori) / x_test_ori



0.021837632771131207

In [44]:
weighting

array([[[0.00437483],
        [0.00507741],
        [0.00569763],
        ...,
        [0.00705542],
        [0.00042331],
        [0.00536689]],

       [[0.0044147 ],
        [0.00502358],
        [0.0057725 ],
        ...,
        [0.0069998 ],
        [0.0004448 ],
        [0.00597047]],

       [[0.00411723],
        [0.00502834],
        [0.00558712],
        ...,
        [0.0069154 ],
        [0.00020404],
        [0.00534146]],

       ...,

       [[0.00410947],
        [0.00466513],
        [0.00592065],
        ...,
        [0.00847674],
        [0.00101272],
        [0.00689551]],

       [[0.00403248],
        [0.00495512],
        [0.00585989],
        ...,
        [0.00850402],
        [0.0012607 ],
        [0.00707504]],

       [[0.00427081],
        [0.00497449],
        [0.00588298],
        ...,
        [0.00845039],
        [0.0020606 ],
        [0.00690689]]])