In [1]:
import torch # to create tensors to store the raw data, weights and biases 
import torch.nn as nn # to make the weights and biases learnable (part of the network)
import torch.nn.functional as F # to apply activation functions 
from torch.optim import Adam  

import lightning as L # to train the model 
from torch.utils.data import DataLoader, TensorDataset # to load the data 
from lightning.pytorch.tuner.tuning import Tuner      
from lightning.pytorch.callbacks import ModelCheckpoint #Lightning is trying to delete the previous checkpoint (because the default ModelCheckpoint is set to save_top_k=1).
                                                        # On Windows a file gets locked as soon as any program (Explorer preview, antivirus, TensorBoard, VS Code, …) opens it, and Windows then blocks the delete call ⇒ PermissionError WinError 32

import pandas as pd # to load the data 
import matplotlib.pyplot as plt # graphs 
import seaborn as sns # graphs

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
token_to_id = {
    "what": 0,
    "is": 1,
    "statquest": 2,
    "<EOS>": 3,
    "awesome": 4
}
id_to_token = dict(map(reversed, token_to_id.items()))


inputs = torch.tensor([[token_to_id["what"],
                        token_to_id["is"],
                        token_to_id["statquest"],
                        token_to_id["<EOS>"],
                        token_to_id["awesome"]],
                       
                       [token_to_id["statquest"],
                        token_to_id["is"],
                        token_to_id["what"],
                        token_to_id["<EOS>"],
                        token_to_id["awesome"]]])

labels = torch.tensor([[token_to_id["is"],
                       token_to_id["statquest"],
                       token_to_id["<EOS>"],
                       token_to_id["awesome"],
                       token_to_id["<EOS>"]],
                      
                      [token_to_id["is"],
                       token_to_id["what"],
                       token_to_id["<EOS>"],
                       token_to_id["awesome"],
                       token_to_id["<EOS>"]]])

dataset = TensorDataset(inputs, labels)
dataloader = DataLoader(dataset)


In [None]:
class PositionEncoding(nn.Module):
    
    def __init__(self, d_model=2, max_len=6): # d_model is the dimension of the word embeddings and max_len is the maximum length of the sequence that the transformer can process for both inputs and outputs
        super().__init__()
        
        pe = torch.zeros(max_len, d_model) # position encoding matrix
        
        position = torch.arange(start=0, end=max_len, step=1).float().unsqueeze(1) # create a column matrix of positions , arrange() is to createa sequence of numbers from 0 to max_len-1, and unsqueeze(1) is to turn the sequence into a column matrix
        embedding_index = torch.arange(start=0, end=d_model, step=2).float() # create a row matrix of embedding indices, arrange() is to create a sequence of numbers from 0 to d_model-1, and step=2 to get 2i in the math formula
        
        div_term = 1/torch.tensor(10000.0)**(embedding_index / d_model) # create a matrix of division terms
        
        pe[:, 0::2] = torch.sin(position * div_term) # fill the even indices of the position encoding matrix with the sine of the position and division term
        pe[:, 1::2] = torch.cos(position * div_term) # fill the odd indices of the position encoding matrix with the cosine of the position and division term
        
        self.register_buffer('pe', pe) # to ensure pe gets moved to the GPU when the model is moved to the GPU
    
    def forward(self, word_embeddings):
        return word_embeddings + self.pe[:word_embeddings.size(0), :] # add the position encoding to the word embeddings