<a href="https://colab.research.google.com/github/whoami-Lory271/thesis-project/blob/main/thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Symbol legend

* B: batch size 
* M: number of channel
* P: patch dimension
* N: number of patches


# Installations and imports


In [2]:
!pip install pytorch-lightning==2.0.1.post0 --quiet

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m718.6/718.6 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.2/519.2 kB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m50.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m264.6/264.6 kB[0m [31m20.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m158.8/158.8 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.2/114.2 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [29]:
import pandas as pd
import logging
from google.colab import drive
import torch
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import numpy as np
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl

In [None]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# create logger
log = logging.getLogger('model_application')
log.setLevel(logging.DEBUG)

# # create console handler and set level to debug
# ch = logging.StreamHandler()
# ch.setLevel(logging.INFO)

# # create formatter
# formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')

# # add formatter to ch
# ch.setFormatter(formatter)

# # add ch to logger
# logger.addHandler(ch)

In [None]:
# 'application' code
log.debug('debug message')
log.info('info message')
# logger.warning('warn message')
# logger.error('error message')
# logger.critical('critical message')

DEBUG:model_application:debug message
INFO:model_application:info message


# Constants

In [None]:
#paths
ELECTRICITY = "electricity"
ROOT_FOLDER = "/content/drive/MyDrive/Università/Magistrale/Tesi/code"

#hyperparameters
BATCH_SIZE = 16

# Preprocessing

## Datasets

In [None]:
datasets_path = {
    ELECTRICITY: ROOT_FOLDER + "/datasets/electricity"
}

datasets_name = {
    ELECTRICITY: "/LD2011_2014.txt"    
}
datasets_processed_name = {
    ELECTRICITY: "/electricity.pkl"
}

### Electricity

**Preprocessing**

In [None]:
# df = pd.read_csv(datasets_path[ELECTRICITY] + datasets_name[ELECTRICITY], sep = ';')
# df.rename(columns={df.columns[0]: 'Date'},inplace=True)
# df.to_pickle(datasets_path[ELECTRICITY] + datasets_processed_name[ELECTRICITY])

In [None]:
df = pd.read_pickle(datasets_path[ELECTRICITY] + datasets_processed_name[ELECTRICITY])

In [None]:
class ElectricityDataset(Dataset):
    def __init__(self, data):
        super().__init__()
        self.data = data

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, idx):
        ts = self.data.iloc[idx, 1:]
        return ts

class ElectricityDataModule(pl.LightningDataModule):
    def __init__(self, path, batch_size, train_size = 0.6, test_size = 0.4):
        super().__init__()
        self.path = path
        data = pd.read_pickle(path)
        self.train_data, self.validate_data ,self.test_data =  np.split(data, [int(train_size*len(data)), int(test_size*len(data))])     

    # def prepare_data(self):
    #     # download

    def setup(self, stage: str):
        # Assign train/val datasets for use in dataloaders
        if stage == "fit":
            self.train = ElectricityDataset(self.train_data)
            self.validate = ElectricityDataset(self.validate_data)

        # Assign test dataset for use in dataloader(s)
        if stage == "test":
            self.test = ElectricityDataset(self.test_data)

        # if stage == "predict":

    def train_dataloader(self):
        return DataLoader(self.train, batch_size=self.batch_size, drop_last = True)

    def val_dataloader(self):
        return DataLoader(self.validation, batch_size=self.batch_size, drop_last = True)

    def test_dataloader(self):
        return DataLoader(self.test, batch_size=self.batch_size, drop_last = True)

    # def predict_dataloader(self):
        


# Model

## PatchTST

In [48]:
x = torch.randint(20, size = (4,20,2))
print(x.shape)
print("---------------------------------")
tail = x[:,-1:,:]
tail = torch.repeat_interleave(tail, 2, dim = 1)
x = torch.concatenate((x,tail), axis = 1)
print(x.shape)
x = x.unfold(dimension=1, size=5, step=2)
print(x.shape)

torch.Size([4, 20, 2])
---------------------------------
torch.Size([4, 22, 2])
torch.Size([4, 9, 2, 5])


In [None]:
# Utility functions

def createPatches(xb, patch_len, stride):
    """
    xb -> [B x L x M]
    output -> [B x N x M x P], N
    """
    batch_size, num_channels, num_var = xb.shape
    # compute number of patches
    patch_num = max(patch_len, num_var)-patch_len // stride + 2

    # we repeat the last variable of the sequence to have equal patches
    tail = torch.repeat_interleave(x[:,-1:,:], stride, dim = 1)
    xb = torch.concatenate((xb, tail), axis = 1)

    # create patches
    xb = xb.unfold(dimension=1, size=patch_len, step=stride)  

    return xb, patch_num


In [None]:
#Model

class PatchTSTEncoder(nn.Module):
    def __init__():
        super().__init__()

