In [None]:
import numpy as np
import torch
from torch.utils.data import DataLoader
from yforseer.datasets import StockDataset, StockDiffDataset
from yforseer.trainers import StockTCNNTrainer
from yforseer.evaluate import evaluate_stock_trend_prediction
from yforseer.utils import convert_diff_to_prices
from tqdm import tqdm
import matplotlib.pyplot as plt
if torch.cuda.is_available():
    dev = "cuda:0"
else:
    dev = 'cpu'
device = torch.device(dev)
print('Device %s is used '%(device))

# Train

In [None]:
batch_size = 32
epochs = 100
lr = 0.001
test_frac = 0.1
memory = 120
lookahead = 10
mode = 'all'

# Load data
load_array_pth = 'data/yahoo/artifacts/data_array.npz'
loaded_data = np.load(load_array_pth)
data = torch.from_numpy(loaded_data['data']).to(torch.float32)
mu, std = loaded_data['mu'].item(), loaded_data['std'].item()
print(data.shape)


# Datasets and dataloaders
num_stocks, num_days = data.shape
test_size = int(test_frac * num_days)
train_size = num_days - test_size
train_data = data[:, :train_size]
test_data = data[:, train_size:]
train_dataset = StockDiffDataset(data = train_data, memory=memory, lookahead=lookahead, mode=mode)
test_dataset = StockDiffDataset(data = test_data, memory=memory, lookahead=lookahead, mode=mode)
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
print('train_dataset:', len(train_dataset))
print('test_dataset:', len(test_dataset))

# Train

num_channels = [512, 512, 256, num_stocks]
mode = 'all'
output_T = lookahead if mode == 'all' else 1
trainer = StockTCNNTrainer(lr = lr, input_size=num_stocks, output_T=output_T, num_channels=num_channels, kernel_size=6, dropout=0.1)
trainer.model.to(device)
all_accs = []
top1_breturns, top3_breturns, all_breturns = [], [], []
top1_breturns_up, top3_breturns_up, all_breturns_up = [], [], []


all_test_loss, all_train_loss = [], []
for epoch in (bar := tqdm(range(epochs))):
    train_loss_list, test_loss_list = [], []
    trainer.model.train()
    for Xdiff_train, ydiff_train, _, _ in train_dataloader:
        train_loss, _ = trainer.train(Xdiff_train.to(device), ydiff_train.to(device))
        train_loss_list.append(train_loss)
    all_train_loss.append(np.mean(train_loss_list))

    trainer.model.eval()
    all_x_test = []
    all_y_pred = []
    all_y_test = []
    for Xdiff_test, ydiff_test, Xprices_test, yprice_test in test_dataloader:
        test_loss, y_pred = trainer.test(Xdiff_test.to(device), ydiff_test.to(device))
        test_loss_list.append(test_loss)

        # Convert difference to prices
        y_pred = y_pred.cpu().detach()
        y_pred[:, :, 0] = y_pred[:, :, 0] + Xprices_test[:, :, -1]
        yprice_pred_test = y_pred.cumsum(dim=2)

        # Store Xprices_test, yprices_test, yprice_pred_test
        Xprices_test = Xprices_test.cpu().detach().numpy() # (M, N, memory)
        yprice_test = yprice_test.cpu().detach().numpy()  # (M, N, lookahead)
        yprice_pred_test = yprice_pred_test.numpy() # (M, N, lookahead)
        
        
        all_x_test.append(Xprices_test)
        all_y_test.append(yprice_test)
        all_y_pred.append(yprice_pred_test)


    all_test_loss.append(np.mean(test_loss_list))

    all_x_test = np.concatenate(all_x_test, axis=0)
    all_y_pred = np.concatenate(all_y_pred, axis=0)
    all_y_test = np.concatenate(all_y_test, axis=0)
    all_x_test = all_x_test * std + mu
    all_y_pred = all_y_pred * std + mu
    all_y_test = all_y_test * std + mu
    (trend_acc, rise_acc, drop_acc), buy_returns, _ = evaluate_stock_trend_prediction(all_x_test[:, :, -1], 
                                                                                      all_y_pred[:, :, -1], 
                                                                                      all_y_test[:, :, -1])
    all_accs.append(trend_acc)
    top1_breturns.append(buy_returns[0][:, 0])
    top3_breturns.append(buy_returns[0][:, 1])
    all_breturns.append(buy_returns[0][:, 2])
    top1_breturns_up.append(buy_returns[1][:, 0])
    top3_breturns_up.append(buy_returns[1][:, 1])
    all_breturns_up.append(buy_returns[1][:, 2])

    bar.set_description(f'ACC={np.nanmedian(trend_acc):0.4f}, Breturns={np.nanmedian(buy_returns[0][:, 2]):0.4f}, test_lose={all_test_loss[-1]:0.4f}, train_loss={all_train_loss[-1]:0.4f}')

    # Log prediction
    np.savez('predictions.npz', x_test=all_x_test, y_pred=all_y_pred, y_test=all_y_test)

all_accs = np.stack(all_accs)
top1_breturns = np.stack(top1_breturns)
top3_breturns = np.stack(top3_breturns)
all_breturns = np.stack(all_breturns)
top1_breturns_up = np.stack(top1_breturns_up)
top3_breturns_up = np.stack(top3_breturns_up)
all_breturns_up = np.stack(all_breturns_up)


# Reinference

In [None]:
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

X_train, y_train, y_pred_train = [], [], []
X_test, y_test, y_pred_test = [], [], []

trainer.model.eval()

for Xdiff_train, ydiff_train, Xprices_train, yprice_train in train_dataloader:
    test_loss, y_pred = trainer.test(Xdiff_train.to(device), ydiff_train.to(device))

    # Convert difference to prices
    y_pred = y_pred.cpu().detach()
    y_pred[:, :, 0] = y_pred[:, :, 0] + Xprices_train[:, :, -1]
    yprice_pred_train = y_pred.cumsum(dim=2)

    # Store Xprices_train, yprices_train, yprice_pred_train
    Xprices_train = Xprices_train.cpu().detach().numpy() # (M, N, memory)
    yprice_train = yprice_train.cpu().detach().numpy()  # (M, N, lookahead)
    yprice_pred_train = yprice_pred_train.numpy() # (M, N, lookahead)
        
    X_train.append(Xprices_train)
    y_train.append(yprice_train)
    y_pred_train.append(yprice_pred_train)


for Xdiff_test, ydiff_test, Xprices_test, yprice_test in test_dataloader:
    _, y_pred = trainer.test(Xdiff_test.to(device), ydiff_test.to(device))
    # Convert difference to prices
    y_pred = y_pred.cpu().detach()
    y_pred[:, :, 0] = y_pred[:, :, 0] + Xprices_test[:, :, -1]
    yprice_pred_test = y_pred.cumsum(dim=2)

    # Store Xprices_test, yprices_test, yprice_pred_test
    Xprices_test = Xprices_test.cpu().detach().numpy() # (M, N, memory)
    yprice_test = yprice_test.cpu().detach().numpy()  # (M, N, lookahead)
    yprice_pred_test = yprice_pred_test.numpy() # (M, N, lookahead)
        
    X_test.append(Xprices_test)
    y_test.append(yprice_test)
    y_pred_test.append(yprice_pred_test)


X_train = np.concatenate(X_train, axis=0)  * std + mu
y_train = np.concatenate(y_train, axis=0).squeeze() * std + mu
y_pred_train = np.concatenate(y_pred_train, axis=0).squeeze() * std + mu
X_test = np.concatenate(X_test, axis=0)  * std + mu
y_test = np.concatenate(y_test, axis=0).squeeze() * std + mu
y_pred_test = np.concatenate(y_pred_test, axis=0).squeeze() * std + mu
