In [4]:
import transformers
import numpy as np
import pandas as pd
import torch
from torch import nn, Tensor
import math

In [5]:
class PositionalEncoder(nn.Module):
    """
    Adapted from: 
    https://pytorch.org/tutorials/beginner/transformer_tutorial.html
    https://github.com/LiamMaclean216/Pytorch-Transfomer/blob/master/utils.py 
    """

    def __init__(self, dropout: float = 0.1, max_seq_len: int = 5000, d_model: int = 512):

        """
        Args:
            dropout: the dropout rate
            max_seq_len: the maximum length of the input sequences
            d_model: The dimension of the output of sub-layers in the model 
                     (Vaswani et al, 2017)
        """

        super().__init__()

        self.d_model = d_model
        
        self.dropout = nn.Dropout(p=dropout)

        # Create constant positional encoding matrix with values 
        # dependent on position and i
        position = torch.arange(max_seq_len).unsqueeze(1)
        
        exp_input = torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        
        div_term = torch.exp(exp_input) # Returns a new tensor with the exponential of the elements of exp_input
        
        pe = torch.zeros(max_seq_len, d_model)

        pe[:, 0::2] = torch.sin(position * div_term)
        
        pe[:, 1::2] = torch.cos(position * div_term) # torch.Size([target_seq_len, dim_val])

        pe = pe.unsqueeze(0).transpose(0, 1) # torch.Size([target_seq_len, input_size, dim_val])

        # register that pe is not a model parameter
        self.register_buffer('pe', pe)
        
    def forward(self, x: Tensor) -> Tensor:
        """
        Args:
            x: Tensor, shape [batch_size, enc_seq_len, dim_val]
        """

        add = self.pe[:x.size(1), :].squeeze(1)

        x = x + add

        return self.dropout(x)

In [6]:
class TimeSeriesTransformer(nn.Module):
    
    def __init__(
        self,
        input_size, 
        dec_seq_len,
        out_seq_len,
        max_seq_len,
        dim_val,
        n_encoder_layers = 4,
        n_decoder_layers = 4,
        dropout_encoder: float=0.2, 
        dropout_decoder: float=0.2,
        dropout_pos_enc: float=0.2,
        dim_feedforward_encoder: int=2048,
        dim_feedforward_decoder: int=2048,
        n_heads = 8,
    ):
        
        super().__init__()
        self.dec_seq_len = dec_seq_len

        print("input_size is: {}".format(input_size))
        print("dim_val is: {}".format(dim_val))
        
        self.encoder_input_layers = nn.Linear(in_features=input_size, out_features=dim_val)
        self.decoder_input_layer = nn.Linear(in_features=input_size, out_features=dim_val)
        
        
        self.positional_encoder = PositionalEncoder(dropout=dropout_pos_enc, max_seq_len=max_seq_len, d_model=dim_val)
        
        encoder_layers = nn.TransformerEncoderLayer(d_model=dim_val, nhead=n_heads, dropout=dropout_encoder, dim_feedforward = dim_feedforward_encoder)
        self.encoder = nn.TransformerEncoder(encoder_layer=encoder_layers, num_layers=n_encoder_layers)
        
        decoder_layer = nn.TransformerDecoderLayer(d_model=dim_val, nhead=n_heads, dropout=dropout_decoder, dim_feedforward = dim_feedforward_decoder)
        self.decoder = nn.TransformerDecoder(decoder_layer=decoder_layer, num_layers=n_decoder_layers)
        
        self.decoder_linear_mapping = nn.Linear(in_features=out_seq_len * dim_val, out_features=out_seq_len)
        
        
        
    def forward(self, src, tgt, src_mask, tgt_mask):
        
        src = self.encoder_input_layers(src)
        src = self.positional_encoder(src)
        
        src = self.encoder(src = src)
        
        
        tgt = self.decoder_input_layer(tgt)
        tgt = self.decoder(tgt = tgt, tgt_mask = tgt_mask, memory = src, memory_mask = src_mask)
        
        
        decoder_output = self.decoder_linear_mapping(tgt)
        return decoder_output

In [None]:
input_size = 1 # The number of features we want to use
dec_seq_len = 30
enc_seq_len = 150

out_seq_len = 1
dim_val = 512

max_seq_len

