<a href="https://colab.research.google.com/github/whoami-Lory271/thesis-project/blob/main/thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Symbol legend

* B: batch size 
* M: number of channel
* P: patch dimension
* N: number of patches
* L: lookback window


# Installations and imports


In [1]:
!pip install pytorch-lightning==2.0.1.post0 --quiet
!pip install einops==0.6.1 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.6/718.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m269.3/269.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 kB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.2/42.2 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import pandas as pd
import logging
from google.colab import drive
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
# https://theaisummer.com/einsum-attention/
import einops
import math
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

In [3]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
# create logger
log = logging.getLogger('model_application')
log.setLevel(logging.DEBUG)

# # create console handler and set level to debug
# ch = logging.StreamHandler()
# ch.setLevel(logging.INFO)

# # create formatter
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# # add formatter to ch
# ch.setFormatter(formatter)

# # add ch to logger
# logger.addHandler(ch)

In [5]:
# 'application' code
log.debug('debug message')
log.info('info message')
# logger.warning('warn message')
# logger.error('error message')
# logger.critical('critical message')

DEBUG:model_application:debug message
INFO:model_application:info message


# Constants

In [6]:
#paths
ELECTRICITY = "electricity"
ROOT_FOLDER = "/content/drive/MyDrive/Università/Magistrale/Tesi/code"

#hyperparameters
BATCH_SIZE = 16

# Preprocessing

## Datasets

In [None]:
datasets_path = {
    ELECTRICITY: ROOT_FOLDER + "/datasets/electricity"
}

datasets_name = {
    ELECTRICITY: "/LD2011_2014.txt"    
}
datasets_processed_name = {
    ELECTRICITY: "/electricity.pkl"
}

### Electricity

**Preprocessing**

In [None]:
# df = pd.read_csv(datasets_path[ELECTRICITY] + datasets_name[ELECTRICITY], sep = ';')
# df.rename(columns={df.columns[0]: 'Date'},inplace=True)
# df.to_pickle(datasets_path[ELECTRICITY] + datasets_processed_name[ELECTRICITY])

In [None]:
df = pd.read_pickle(datasets_path[ELECTRICITY] + datasets_processed_name[ELECTRICITY])

In [None]:
class ElectricityDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        ts = self.data.iloc[idx, 1:]
        return ts

class ElectricityDataModule(pl.LightningDataModule):
    def __init__(self, path, batch_size, train_size = 0.6, test_size = 0.4):
        super().__init__()
        self.path = path
        data = pd.read_pickle(path)
        self.train_data, self.validate_data ,self.test_data =  np.split(data, [int(train_size*len(data)), int(test_size*len(data))])     

    # def prepare_data(self):
    #     # download

    def setup(self, stage: str):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit":
            self.train = ElectricityDataset(self.train_data)
            self.validate = ElectricityDataset(self.validate_data)

        # Assign test dataset for use in dataloader(s)
        if stage == "test":
            self.test = ElectricityDataset(self.test_data)

        # if stage == "predict":

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, drop_last = True)

    def val_dataloader(self):
        return DataLoader(self.validation, batch_size=self.batch_size, drop_last = True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, drop_last = True)

    # def predict_dataloader(self):
        


# Model

## PatchTST

In [7]:
x = torch.randint(20, size = (4,20,2))
print(x.shape)
print("---------------------------------")
tail = x[:,-1:,:]
tail = torch.repeat_interleave(tail, 2, dim = 1)
x = torch.concatenate((x,tail), axis = 1)
print(x.shape)
x = x.unfold(dimension=1, size=5, step=2)
print(x.shape)

torch.Size([4, 20, 2])
---------------------------------
torch.Size([4, 22, 2])
torch.Size([4, 9, 2, 5])


In [8]:
log.debug("my term")
div_term = torch.pow(10000.0, torch.arange(0, 16, 2) / 16) 
print(div_term)
log.debug("other term")
div_term = torch.exp(torch.arange(0, 16, 2) * -(math.log(10000.0) / 16))
print(div_term)

DEBUG:model_application:my term
DEBUG:model_application:other term


tensor([1.0000e+00, 3.1623e+00, 1.0000e+01, 3.1623e+01, 1.0000e+02, 3.1623e+02,
        1.0000e+03, 3.1623e+03])
tensor([1.0000e+00, 3.1623e-01, 1.0000e-01, 3.1623e-02, 1.0000e-02, 3.1623e-03,
        1.0000e-03, 3.1623e-04])


In [29]:
x = torch.randint(20, size = (32,16,7,8), dtype=torch.float32)
y = torch.randint(20, size = (32,16,7,8), dtype=torch.float32)
score = x @ y.transpose(2,3)
log.debug((score @ x).shape)

DEBUG:model_application:torch.Size([32, 16, 7, 8])


In [48]:
# Utility functions

def create_patches(xb, patch_len, stride):
    """
    xb -> [B x L x M]
    output -> [B x N x M x P], N
    """
    _, num_var, _ = xb.shape
    # compute number of patches
    patch_num = (max(patch_len, num_var)-patch_len) // stride + 2

    # we repeat the last variable of the sequence to have equal patches
    tail = torch.repeat_interleave(x[:,-1:,:], stride, dim = 1)
    xb = torch.concatenate((xb, tail), axis = 1)

    # create patches
    xb = xb.unfold(dimension=1, size=patch_len, step=stride)

    assert patch_num == xb.shape[1], f"wrong number of computed patches, expected {patch_num} but computed {xb.shape[1]}"

    return xb, patch_num

"""
ref: https://kazemnejad.com/blog/transformer_architecture_positional_encoding/
"""

def positional_encoding(batch_size, patch_num, d_model):
    """
    output -> [B x N x D]
    """
    pe = torch.zeros(batch_size, patch_num, d_model)
    # create a positional array
    position = torch.arange(0, patch_num).unsqueeze(1)
    # div term for half of positions
    div_term = torch.pow(10000.0, torch.arange(0, d_model, 2) / d_model) 
    # even positions
    pe[:, :, 0::2] = torch.sin(position * div_term)
    # odd positions
    pe[:, :, 1::2] = torch.cos(position * div_term)

    # if normalize:
    #     pe = pe - pe.mean()
    #     pe = pe / (pe.std() * 10)
    
    return nn.parameter.Parameter(pe, requires_grad= False)

In [49]:
#PatchTST

class PatchTSTEncoder(nn.Module):
    def __init__(self, num_channels, patch_num, patch_len, stride, batch_size = 16, d_model = 128, n_layers = 3, n_heads = 16, dropout = 0.2):
        super(PatchTSTEncoder, self).__init__()
        self.num_channels = num_channels
        self.patch_num = patch_num
        self.patch_len = patch_len
        self.stride = stride
        self.d_model = d_model
        self.n_layers = n_layers
        self.n_heads = n_heads
        self.dropout = dropout

        # instance normalization
        """
        ref: https://wandb.ai/wandb_fc/Normalization-Series/reports/Instance-Normalization-in-PyTorch-With-Examples---VmlldzoxNDIyNTQx
        """
        self.inst_norm = nn.InstanceNorm1d(num_channels)

        # patch creation
        self.create_patch = create_patches

        # embedding
        self.W_p = nn.Linear(patch_len, d_model, bias = False)

        # positional encoding
        self.W_pos = positional_encoding(batch_size * num_channels, patch_num, d_model)

        # dropout
        self.dropout = nn.Dropout(dropout)

        # encoder
        self.encoders = nn.ModuleList([VanillaTransformerEncoder(d_model) for _ in range(n_layers)])

    def forward(self, x):
        """
        x -> [B x L x M]
        output -> [(B M) x N x D]
        """
        # we need to reshape dimensione before apply instance normalization
        x = einops.rearrange(self.inst_norm(einops.rearrange(x, 'b l m -> b m l')), 'b m l -> b l m')

        # create patches
        x, patch_num = self.create_patch(x, self.patch_len, self.stride)

        # reshape the tensor from [B x M x P x N] -> [(B M) x P x N]
        x = einops.rearrange(x, 'b n m p -> (b m) n p')
        # now it can be provided to our transformer implementation

        # project into transformer latent space
        x = self.W_p(x) + self.W_pos

        for layer in self.encoders:
            x = layer(x)

        return x



In [50]:
# VanillaTransformer encoder
class VanillaTransformerEncoder(nn.Module):
    def __init__(self, d_model, n_heads = 16, dropout = 0.2):
        super(VanillaTransformerEncoder, self).__init__()
        
        self.mha = MultiHeadAttention(d_model, n_heads)
        self.norm1 = nn.LayerNorm(d_model) # maybe batch normalization
        self.dropout1 = nn.Dropout(dropout)

        self.pffn = PositionWiseFeedForwardNetwork(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.dropout2 = nn.Dropout(dropout)

    def forward(self, x):
        # new variable because of residual connection
        z = self.mha(x,x,x)
        z = self.dropout1(z)
        z = self.norm1(z + x)

        # set the new value for the residual connection
        x = z
        z = self.pffn(z)
        z = self.dropout2(z)
        return self.norm2(z + x)

"""
ref: https://d2l.ai/chapter_attention-mechanisms-and-transformers/multihead-attention.html
"""
class MultiHeadAttention(nn.Module):
    def __init__(self, d_model, n_heads = 16):
        super(MultiHeadAttention, self).__init__()

        assert d_model % n_heads == 0, "n_heads must be a multiple of d_model"

        self.n_heads = n_heads
        self.d_k = d_model // n_heads

        self.W_q = nn.Linear(d_model, d_model, bias = False)
        self.W_k = nn.Linear(d_model, d_model, bias = False)
        self.W_v = nn.Linear(d_model, d_model, bias = False)
        self.W_o = nn.Linear(d_model, d_model, bias = False)

    # reshape to compute in parallel the several heads
    def reshape_vector(self, x, inverse = False):
        """
        x: [B x N x D] || [B x N x H x DIM]
        output: [B x N x H x DIM] || [B x N x D]
        """
        out = None

        if not inverse:
            out = einops.rearrange(x, 'b n (dim h) -> b h n dim', h=self.n_heads)
        else:
            out = einops.rearrange(x, 'b h n dim -> b n (dim h)')

        return out

    """
    ref: https://machinelearningmastery.com/the-transformer-attention-mechanism/
    """

    def scaled_attention(self, q, k, v, dk):
        """
        q: [B x H x N x DIM], k: [B x H x N x DIM] , v: [B x H x N x DIM]
        output: [B x H x N x DIM]
        """
        sqrt_d_k = math.sqrt(dk)

        # using einsum to perform batch matrix multiplication
        score = einops.einsum(q, k, 'b h n d_k, b h n_1 d_k -> b h n n_1') / sqrt_d_k

        weights = F.softmax(score, dim = -1)

        res = einops.einsum(weights, v, 'b h n n_1, b h n_1 d_k -> b h n d_k')

        return res

    def forward(self, q, k, v):
        """
        q, k, v: [B x N x D]
        output: [B x N x D]
        """
        q = self.reshape_vector(self.W_q(q))
        k = self.reshape_vector(self.W_k(k))
        v = self.reshape_vector(self.W_v(v))

        # parallel computation
        out = self.scaled_attention(q, k, v, self.d_k)
        out_concat = self.reshape_vector(out, inverse = True)

        return self.W_o(out_concat)

class PositionWiseFeedForwardNetwork(nn.Module):
    def __init__(self, d_model, d_inner = 256):
        super(PositionWiseFeedForwardNetwork, self).__init__()
        self.W_1 = nn.Linear(d_model, d_inner)
        self.act = nn.GELU()
        self.W_2 = nn.Linear(d_inner, d_model)
    
    def forward(self, x):
        x = self.W_1(x)
        x = self.act(x)
        return self.W_2(x)

In [52]:
B = 16
L = 20
M = 3
P = 5
S = 3
N = (L - P)  // S + 2

x = torch.randint(20, size = (B,L,M), dtype=torch.float32)
encoder = PatchTSTEncoder(M, N, P, S)
x = encoder(x)
log.debug(x.shape)

DEBUG:model_application:torch.Size([48, 7, 128])
