In [1]:
import numpy as np
import torch
import matplotlib.pyplot as plt
import dilated_causal_convolution_layer
import datasets



In [2]:
import math
from torch.utils.data import DataLoader

In [3]:
class TransformerTimeSeries(torch.nn.Module):
    """
    Official implementation of paper "Modelling Long- and Short-term Multi-dimensional Patterns in Predictive Maintenance with Accumulative Attention"
    
    dilated_causal_convolution_layer parameters:
        in_channels: the number of features per time point
        out_channels: the number of features outputted per time point
        kernel_size: k is the width of the 1-D sliding kernel
        
    nn.Transformer parameters:
        d_model: the size of the embedding vector (input)
    
    PositionalEncoding parameters:
        d_model: the size of the embedding vector (positional vector)
        dropout: the dropout to be used on the sum of positional+embedding vector
    
    """
    def __init__(self):
        super(TransformerTimeSeries,self).__init__()
        self.input_embedding = dilated_causal_convolution_layer.context_embedding(2, 256, 9)
        self.positional_embedding = torch.nn.Embedding(512,256)

        
        self.decode_layer = torch.nn.TransformerEncoderLayer(d_model=256,nhead=8)
        self.transformer_decoder = torch.nn.TransformerEncoder(self.decode_layer, num_layers=3)
        
        self.fc1 = torch.nn.Linear(256,1)
        
    def forward(self,x,y,attention_masks):
        
        # concatenate observed points and time covariate
        # (B*feature_size*n_time_points)
        z = torch.cat((y.unsqueeze(1),x.unsqueeze(1)),1)

        # input_embedding returns shape (Batch size,embedding size,sequence len) -> need (sequence len,Batch size,embedding_size)
        z_embedding = self.input_embedding(z).permute(2,0,1)
        
        # get my positional embeddings (Batch size, sequence_len, embedding_size) -> need (sequence len,Batch size,embedding_size)
        positional_embeddings = self.positional_embedding(x.type(torch.long)).permute(1,0,2)
        
        input_embedding = z_embedding+positional_embeddings
        
        transformer_embedding = self.transformer_decoder(input_embedding,attention_masks)

        output = self.fc1(transformer_embedding.permute(1,0,2))
        
        return output
        

In [4]:
t0 = 24

In [5]:
train_dataset = datasets.time_series_prepocess(t0,4500)
validation_dataset = datasets.time_series_prepocess(t0,500)
test_dataset = datasets.time_series_prepocess(t0,1000)

x: 4500*48 fx: 4500*48
x: 500*48 fx: 500*48
x: 1000*48 fx: 1000*48


In [6]:
criterion = torch.nn.MSELoss()

In [7]:
train_dl = DataLoader(train_dataset,batch_size=32,shuffle=True)
validation_dl = DataLoader(validation_dataset,batch_size=64)
test_dl = DataLoader(test_dataset,batch_size=128)

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = TransformerTimeSeries().to(device)

In [9]:
lr = .0005 # learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=lr)
epochs = 100

In [10]:
def Dp(y_pred,y_true,q):
    return max([q*(y_pred-y_true),(q-1)*(y_pred-y_true)])


In [11]:
def Rp_num_den(y_preds,y_trues,q):
    numerator = np.sum([Dp(y_pred,y_true,q) for y_pred,y_true in zip(y_preds,y_trues)])
    denominator = np.sum([np.abs(y_true) for y_true in y_trues])
    return numerator,denominator

In [12]:
def train_epoch(model,train_dl,t0=96):
    model.train()
    train_loss = 0
    n = 0
    for step,(x,y,attention_masks) in enumerate(train_dl):
        optimizer.zero_grad()
        output = model(x.to(device),y.to(device),attention_masks[0].to(device))
        loss = criterion(output.squeeze()[:,(t0-1):(t0+24-1)],y.to(device)[:,t0:])
        loss.backward()
        optimizer.step()
        
        train_loss += (loss.detach().cpu().item() * x.shape[0])
        n += x.shape[0]
    return train_loss/n

In [13]:
def eval_epoch(model,validation_dl,t0=96):
    model.eval()
    eval_loss = 0
    n = 0
    with torch.no_grad():
        for step,(x,y,attention_masks) in enumerate(validation_dl):
            output = model(x.to(device),y.to(device),attention_masks[0].to(device))
            loss = criterion(output.squeeze()[:,(t0-1):(t0+24-1)],y.to(device)[:,t0:]) 
           
            
            eval_loss += (loss.detach().cpu().item() * x.shape[0])
            n += x.shape[0]
            
    return eval_loss/n

In [14]:
def test_epoch(model,test_dl,t0=96):
    with torch.no_grad():
        predictions = []
        observations = []

        model.eval()
        for step,(x,y,attention_masks) in enumerate(test_dl):
            output = model(x.to(device),y.to(device),attention_masks[0].to(device))

            for p,o in zip(output.squeeze()[:,(t0-1):(t0+24-1)].cpu().numpy().tolist(),y.to(device)[:,t0:].cpu().numpy().tolist()):
           

                predictions.append(p)
                observations.append(o)

        num = 0
        den = 0
        for y_preds,y_trues in zip(predictions,observations):
            num_i,den_i = Rp_num_den(y_preds,y_trues,.5)
            num+=num_i
            den+=den_i
        Rp = (2*num)/den
        
    return Rp

In [15]:
train_epoch_loss = []
eval_epoch_loss = []
Rp_best = 10
for e,epoch in enumerate(range(epochs)):
    train_loss = []
    eval_loss = []
    
    l_t = train_epoch(model,train_dl,t0)
    train_loss.append(l_t)
    
    l_e = eval_epoch(model,validation_dl,t0)
    eval_loss.append(l_e)
    
    Rp = test_epoch(model,test_dl,t0)

    if Rp_best > Rp:
        Rp_best = Rp
        
    train_epoch_loss.append(np.mean(train_loss))
    eval_epoch_loss.append(np.mean(eval_loss))
    
    print("Epoch {}: Train loss: {} \t Validation loss: {} \t R_p={}".format(e,
                                                             np.mean(train_loss),
                                                             np.mean(eval_loss),Rp))



Epoch 0: Train loss: 3797.3092706163193 	 Validation loss: 2670.141025390625 	 R_p=0.5997030672444467
Epoch 1: Train loss: 1854.2861497395834 	 Validation loss: 1287.182890625 	 R_p=0.407403951472577
Epoch 2: Train loss: 872.4550033365886 	 Validation loss: 477.61826904296873 	 R_p=0.2032849813286514
Epoch 3: Train loss: 292.14651820203994 	 Validation loss: 174.13368725585937 	 R_p=0.10485630540918982
Epoch 4: Train loss: 115.94602427842882 	 Validation loss: 72.79549835205079 	 R_p=0.06477228327333508
Epoch 5: Train loss: 51.66915262179904 	 Validation loss: 34.754987777709964 	 R_p=0.0459488342944672
Epoch 6: Train loss: 25.550670500013563 	 Validation loss: 17.032090919494628 	 R_p=0.02924069037860087
Epoch 7: Train loss: 14.261297188652886 	 Validation loss: 9.943371826171875 	 R_p=0.02357813839868287
Epoch 8: Train loss: 9.171848890516493 	 Validation loss: 7.471519798278808 	 R_p=0.024025369055495963
Epoch 9: Train loss: 6.270962770673964 	 Validation loss: 8.810970008850097 	 R

Epoch 79: Train loss: 1.3752249983681573 	 Validation loss: 1.6489824514389038 	 R_p=0.014327236120659712
Epoch 80: Train loss: 1.396236649831136 	 Validation loss: 2.507786087036133 	 R_p=0.017794621132593117
Epoch 81: Train loss: 1.418894773695204 	 Validation loss: 1.6211523199081421 	 R_p=0.014150509906639715
Epoch 82: Train loss: 1.3657550314797295 	 Validation loss: 1.7152012615203858 	 R_p=0.014445598937655818
Epoch 83: Train loss: 1.3558725352817111 	 Validation loss: 2.6196584186553955 	 R_p=0.017899608062739644
Epoch 84: Train loss: 1.40549073515998 	 Validation loss: 1.3496475772857666 	 R_p=0.012811915046972214
Epoch 85: Train loss: 1.4001316346062553 	 Validation loss: 1.3212452440261842 	 R_p=0.012697500766598032
Epoch 86: Train loss: 1.3466638241873847 	 Validation loss: 1.6677100067138673 	 R_p=0.014353269460440412
Epoch 87: Train loss: 1.3485321091545952 	 Validation loss: 1.7443421087265014 	 R_p=0.014586439696879306
Epoch 88: Train loss: 1.3473142716089885 	 Validati

In [16]:
print("Rp best={}".format(Rp_best))

Rp best=0.01205371967563964
