In [None]:
import sys
sys.path.append('C:\\Users\\xzhu\\Documents\\GitHub\\trading')

import pandas as pd
import datetime
import math
from pytorch_forecasting import TimeSeriesDataSet

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger
import torch

from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
from pytorch_forecasting.data.encoders import TorchNormalizer
from madgrad import MADGRAD

In [None]:
%load_ext autoreload
%autoreload 2

import zenzic.strategies.pytorch.tft.data_processor as dp

In [None]:
SEQ_LEN = 256
PRED_LEN = 32
START_DATE = '2010-01-01'
BATCH_SIZE = 32


In [None]:
prices = dp.sp500_prices(start_date=START_DATE)
date_features = dp.date_features(prices)
prices = prices.join(date_features, on='Date', how='left')

In [None]:
df_train, df_test = dp.split_data(prices, SEQ_LEN, PRED_LEN, 0.7)


In [None]:
train_data = TimeSeriesDataSet(
    df_train,
    time_idx='Date_idx',
    target='Close',
    group_ids=['Symbol'],
    max_encoder_length=SEQ_LEN,
    max_prediction_length=PRED_LEN,
    time_varying_known_reals=[
        'Date_idx',
        'Day_of_year',
        'Day_of_week',
        'Month',
        'Day'],
    time_varying_unknown_reals=[
        'Open',
        'High',
        'Low',
        'Close'
    ],
    target_normalizer=TorchNormalizer(),
    add_relative_time_idx=True,
    add_target_scales=False,
    add_encoder_length=False,
    allow_missing_timesteps=True,
)
test_data = TimeSeriesDataSet.from_dataset(train_data, df_test, predict=False, stop_randomization=True)

train_dataloader = train_data.to_dataloader(train=True, batch_size=BATCH_SIZE, num_workers=0)
test_dataloader = test_data.to_dataloader(train=False, batch_size=BATCH_SIZE, num_workers=0)

In [None]:
# configure network and trainer
pl.seed_everything(42)
trainer = pl.Trainer(
    gpus=1,
    # clipping gradients is a hyperparameter and important to prevent divergance
    # of the gradient for recurrent neural networks
    gradient_clip_val=0.1,
)


tft = TemporalFusionTransformer.from_dataset(
    train_data,
    # not meaningful for finding the learning rate but otherwise very important
    learning_rate=0.03,
    hidden_size=16,  # most important hyperparameter apart from learning rate
    # number of attention heads. Set to up to 4 for large datasets
    attention_head_size=1,
    dropout=0.1,  # between 0.1 and 0.3 are good values
    hidden_continuous_size=8,  # set to <= hidden_size
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    # reduce learning rate if no improvement in validation loss after x epochs
    reduce_on_plateau_patience=4,
    optimizer=MADGRAD
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

In [None]:
# find optimal learning rate
res = trainer.tuner.lr_find(
    tft,
    train_dataloader=train_dataloader,
    val_dataloaders=test_dataloader,
    max_lr=10.0,
    min_lr=1e-6,
)

print(f"suggested learning rate: {res.suggestion()}")
fig = res.plot(show=True, suggest=True)
fig.show()

In [None]:
# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=1e-4, patience=10, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate
logger = TensorBoardLogger("lightning_logs")  # logging results to a tensorboard

trainer = pl.Trainer(
    max_epochs=30,
    gpus=1,
    weights_summary="top",
    gradient_clip_val=0.01,
    limit_train_batches=30,  # coment in for training, running valiation every 30 batches
    # fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    logger=logger,
)


tft = TemporalFusionTransformer.from_dataset(
    train_data,
    learning_rate=0.01,
    hidden_size=16,
    attention_head_size=1,
    dropout=0.1,
    hidden_continuous_size=8,
    output_size=7,  # 7 quantiles by default
    loss=QuantileLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=4,
    optimizer=MADGRAD
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

In [None]:
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=test_dataloader,
)

In [None]:
import pickle

from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

# create study
study = optimize_hyperparameters(
    train_dataloader,
    test_dataloader,
    model_path="optuna_test",
    n_trials=200,
    max_epochs=50,
    gradient_clip_val_range=(0.01, 1.0),
    hidden_size_range=(8, 128),
    hidden_continuous_size_range=(8, 128),
    attention_head_size_range=(1, 4),
    learning_rate_range=(0.001, 0.1),
    dropout_range=(0.1, 0.3),
    trainer_kwargs=dict(limit_train_batches=30),
    reduce_on_plateau_patience=4,
    use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
    verbose=True,
    optimizer=MADGRAD
)

# save study results - also we can resume tuning at a later point in time
with open("test_study.pkl", "wb") as fout:
    pickle.dump(study, fout)

# show best hyperparameters
print(study.best_trial.params)