<a href="https://colab.research.google.com/github/vishal-burman/PyTorch-Architectures/blob/master/misc/Transformer_TimeSeries.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# ! pip install wget
# ! pip install datasets
! git clone https://github.com/vishal-burman/PyTorch-Architectures.git # --> QOL helper methods
%cd PyTorch-Architectures/

fatal: destination path 'PyTorch-Architectures' already exists and is not an empty directory.
/content/PyTorch-Architectures


In [2]:
from tqdm.auto import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from toolkit.utils import get_optimal_batchsize, dict_to_device, EarlyStopping
from toolkit.utils import get_linear_schedule_with_warmup

In [3]:
class CustomDataset(Dataset):
  def __init__(self, train=True):
    self.root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/"
    self.url = self.root_url + ("FordA_TRAIN.tsv" if train else "FordA_TEST.tsv")
    self.data = np.loadtxt(self.url, delimiter="\t")
    self.x = self.data[:, 1:]
    self.y = self.data[:, 0].astype(int)
    self.y[self.y == -1] = 0
  
  def __len__(self,):
    return len(self.data)
  
  def __getitem__(self, idx):
    inputs = self.x[idx]
    labels = self.y[idx]
    return (inputs, labels)

  def collate_fn(self, batch):
    inputs = []
    labels = []
    for sample in batch:
      inputs.append(sample[0])
      labels.append(sample[1])
    inputs = torch.tensor(inputs)
    labels = torch.tensor(labels, dtype=torch.long)
    return {
        'inputs': inputs[:, :, None],
        'labels': labels[:, None, None],
    }

In [4]:
class Encoder(nn.Module):
  def __init__(self, dim, num_heads, dropout=0.):
    super().__init__()
    self.layernorm = nn.LayerNorm(dim, eps=1e-6)
    self.attention = nn.MultiheadAttention(embed_dim=dim, 
                                           num_heads=num_heads,
                                           dropout=dropout,
                                           batch_first=True)
    self.dropout = nn.Dropout(dropout)

    self.lin_1 = nn.Linear(dim, dim * 2)
    self.act = nn.ReLU(inplace=True)
    self.lin_2 = nn.Linear(dim * 2, dim)
  
  def forward(self, x):
    x_orig = x
    x = self.layernorm(x)
    x = self.attention(query=x, key=x, value=x)[0]
    x = self.dropout(x)
    res = x + x_orig

    x = self.layernorm(res)
    x = self.lin_1(x)
    x = self.act(x)
    x = self.dropout(x)
    x = self.lin_2(x)
    return (x + res)

In [5]:
class TransformerTimeSeries(nn.Module):
  def __init__(self, dim, mlp_dim, num_heads, num_classes, depth=4, dropout=0.):
    super().__init__()
    self.inputs_to_embeds = nn.Linear(1, dim)
    self.depth = depth
    self.dropout = nn.Dropout(dropout)

    self.encoder = nn.ModuleList([])
    for _ in range(depth):
      self.encoder.append(Encoder(dim=dim, num_heads=num_heads, dropout=dropout))
    self.pooler = nn.AvgPool1d(kernel_size=dim)

    self.mlp_proj = nn.Linear(500, mlp_dim)
    self.act = nn.ReLU(inplace=True)
    self.logits_proj = nn.Linear(mlp_dim, num_classes)
  
  def forward(self, inputs, labels=None):
    x = self.inputs_to_embeds(inputs)
    x = self.dropout(x)
    for enc in self.encoder:
      x = enc(x)
    pooled_x = self.pooler(x).squeeze(2)
    pooled_x = self.act(self.mlp_proj(pooled_x))
    pooled_x = self.dropout(pooled_x)
    logits = self.logits_proj(pooled_x)
    loss = None
    if labels is not None:
      loss_fct = nn.CrossEntropyLoss()
      loss = loss_fct(logits.view(logits.size(0), -1), labels.view(-1))
    return (loss, logits)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = TransformerTimeSeries(dim=256, 
                              mlp_dim=128,
                              num_heads=4,
                              num_classes=2,
                              depth=2, 
                              dropout=0.1)
model = model.double()
model.to(device)

In [7]:
params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Trainable Parameters: {params}")

Trainable Parameters: 1118082


In [8]:
# Hyperparameter section
BS = 128
EPOCHS = 20
LR = 5e-4
early_stop = EarlyStopping(metric="val_accuracy", patience=10, verbose=True)

In [9]:
dataset_train = CustomDataset(train=True)
dataset_valid = CustomDataset(train=False)

train_loader = DataLoader(dataset_train, batch_size=BS, shuffle=False,
                          collate_fn=dataset_train.collate_fn)
valid_loader = DataLoader(dataset_valid, batch_size=BS, shuffle=False,
                          collate_fn=dataset_valid.collate_fn)

print(f"Length of Train Loader: {len(train_loader)}")
print(f"Length of Valid Loader: {len(valid_loader)}")

Length of Train Loader: 29
Length of Valid Loader: 11


In [10]:
# get_optimal_batchsize(dataset_train, model)

In [10]:
# Sanity check forward pass
loader = DataLoader(dataset_train, batch_size=4, shuffle=False,
                          collate_fn=dataset_train.collate_fn)
model.eval()
with torch.set_grad_enabled(False):
  for sample in loader:
    outputs = model(**dict_to_device(sample, device=device))
    loss, logits = outputs
    assert logits.size(-1) == 2, "Last Dimension not equal to classes"
    break

In [11]:
num_training_steps = len(train_loader) * EPOCHS
optimizer = torch.optim.Adam(model.parameters(), lr=LR)
scheduler = get_linear_schedule_with_warmup(optimizer, 
                                            num_warmup_steps=0,
                                            num_training_steps=num_training_steps)

In [12]:
def get_accuracy(model, data_loader, device):
  correct, total = 0, 0
  with torch.set_grad_enabled(False):
    for sample in data_loader:
      sample = dict_to_device(sample, device=device)
      inputs = sample["inputs"]
      labels = sample["labels"].view(-1)
      _, logits = model(inputs=inputs)
      probas = F.softmax(logits, dim=-1)
      _, preds = torch.max(probas, dim=-1)
      correct += (preds == labels).sum()
      total += labels.size(0)
  return (correct.float() / total * 100).item()

In [13]:
progress_bar = tqdm(range(num_training_steps))

for epoch in range(EPOCHS):
  loss_list = []
  model.train()
  for idx, sample in enumerate(train_loader):
    outputs = model(**dict_to_device(sample, device=device))
    loss, _ = outputs
    loss_list.append(loss.item())
    loss.backward()
    optimizer.step()
    scheduler.step()
    optimizer.zero_grad()
    progress_bar.update(1)
  model.eval()
  with torch.set_grad_enabled(False):
    val_accuracy = get_accuracy(model, valid_loader, device)
    early_stop(val_accuracy, model)
    if early_stop.early_stop:
      print("Early Stopping!")
      break

  0%|          | 0/580 [00:00<?, ?it/s]

Validation accuracy increased from -inf% to 60.98%
Validation accuracy increased from 60.98% to 71.21%
Validation accuracy increased from 71.21% to 74.17%
Validation accuracy increased from 74.17% to 76.29%
Validation accuracy increased from 76.29% to 77.42%
Validation accuracy increased from 77.42% to 78.56%
Validation accuracy increased from 78.56% to 78.86%
Validation accuracy increased from 78.86% to 79.47%
Validation accuracy increased from 79.47% to 80.00%
EarlyStopping counter: 1 out of 10
Validation accuracy increased from 80.00% to 80.91%
EarlyStopping counter: 1 out of 10
EarlyStopping counter: 2 out of 10
EarlyStopping counter: 3 out of 10
Validation accuracy increased from 80.91% to 81.06%
EarlyStopping counter: 1 out of 10
EarlyStopping counter: 2 out of 10
EarlyStopping counter: 3 out of 10
EarlyStopping counter: 4 out of 10
EarlyStopping counter: 5 out of 10
