## Import the Libraries

In [1]:
import pandas
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from torchvision.transforms import Lambda, ToTensor

## Write the Datasets and Dataloaders

In [2]:
# Create a Custom Dataset from train_data.csv and eval_data.csv
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pandas.read_csv(csv_file)
        self.transform = transform
        # Remove the first row from the Dataframe
        self.data = self.data.iloc[1:]

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist() # Convert the tensor to a list
        # Get the data from the Dataframe
        data = self.data.iloc[idx]
        # Convert the data to a numpy array
        data = data[0] # First column of the Dataframe
        data_num = []
        for i in data:
            data_num.append(ord(i)- 96)
        data = np.array(data_num)
        # Convert the data to a tensor
        data = torch.from_numpy(data)
        label = self.data.iloc[idx]
        label = label[1]
        label_num = []
        for i in label:
            label_num.append(ord(i)- 96)
        label = np.array(label_num)
        if self.transform:
            data = self.transform(data) # Apply the transform on the data
        return data, label

# Write a DataLoader for the Custom Dataset
train_dataset = CustomDataset('./A3 files/train_data.csv', transform=None)

# Split the Dataset into Train and Validation Datasets
train_dataset, val_dataset = train_test_split(train_dataset, test_size=0.2)

# Create a DataLoader for the Train and Validation Datasets
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True)

############################# TESTING CODE #############################

# Print out some data from the Train Dataset
for i, data in enumerate(train_dataloader):
    print(data)
    break

[tensor([[ 4,  8, 21, 11, 20, 20,  6, 18]]), tensor([[ 1, 16,  7, 25,  4, 17, 15, 15]])]


## Write the Transformer Class

In [4]:
import math

class PositionalEncoding(nn.Module):
    # Injecting Some Information about the Relative or the Absolute Positioning of the tokens in the sequence
    def __init__(self, d_model, dropout = 0.1, max_len = 20):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p = dropout)
        position = []
        pos_enc = torch.zeros(max_len, d_model) # Positional Encoding -> Max Length and the dimensions of the model
        for i in range(max_len):
            position.append(i)
        position = torch.tensor(position).unsqueeze(1) # Got the pos -> value
        # Now, you want to make the position term to be max_len, d_model
        position_stacked = [position] * d_model 
        position = torch.cat(position_stacked, dim=1)
        # Now to obtain the 10000^2i/d_model 
        div_term = torch.arange(0, d_model, 2) # The Value to be divided
        div_term = div_term/d_model
        div_term = div_term.type(torch.float64)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))   
        pos_enc[:, 0::2] = torch.sin(position[:, 0::2]*div_term)
        pos_enc[:, 1::2] = torch.cos(position[:, 1::2]*div_term)
        self.pos_enc = pos_enc
        return
    
    def forward(self, x):
        x = x + self.pos_enc # This is placeholder -> Modify to include only the len given -> Not Max Len
        return self.dropout(x)

# Write the Transformer Model
class TransformerModel(nn.Transformer):
    # Write the __init__() function
    def __init__(self, n_inputs, n_tokens, n_heads, n_hidden, n_layers, dropout = 0.5):
        super(TransformerModel, self).__init__(d_model=n_inputs, nhead=n_heads, dim_feedforward=n_hidden, num_encoder_layers=n_layers)
        self.src_mask = None
        self.pos_encoding = PositionalEncoding(n_inputs, dropout) # Positionally Encode the Tokens
        self.input_embedding = nn.Embedding(n_tokens, n_inputs) # N_Tokens = Vocabulary -> Gives output as n_inputs
        self.n_inputs = n_inputs
        self.decoder = nn.Linear(n_inputs, n_tokens) # This isn't the Transformer Decoder -> Basically Vocabulary to n_inputs

        self.init_weights()

    def init_weights(self):
        init_range = 0.1 # Given the layers we can access their weights
        nn.init.uniform_(self.input_embedding.weight, -init_range, init_range)
        nn.init.zeros_(self.decoder.bias)
        nn.init.uniform_(self.decoder.weight, -init_range, init_range)

    def _generate_mask(self, sz):
        mask = torch.triu(torch.ones(sz, sz)).transpose(0, 1)
        # Wherever 0 is present fill in -inf's
        mask = torch.where(mask == 0, torch.tensor(-float('inf')), 0) # Condition, Replacement, if False Keep What
        return mask
    
    def forward(self, src, has_mask = True):
        src = self.input_embedding(src)*math.sqrt(self.n_inputs)
        src = self.pos_encoding(src)
        output = self.encoder(src, mask = self.src_mask)
        output = self.decoder(output)
        return F.log_softmax(output, dim=-1)
    

Model = TransformerModel(512, 8, 8, 5, 2, 0.1)

In [5]:
# Write the Training Loop
def train(model, train_dataloader, val_dataloader, epochs, optimizer, criterion):
    model.train()
    for epoch in range(epochs):
        for i, data in enumerate(train_dataloader):
            src = data[0]
            trg = data[1]
            optimizer.zero_grad()
            output = model(src)
            loss = criterion(output, trg)
            loss.backward()
            optimizer.step()
            if i % 100 == 0:
                print("Epoch: {} Iteration: {} Loss: {}".format(epoch, i, loss.item()))
    return model

# Write the Evaluation Loop
def evaluate(model, val_dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for i, data in enumerate(val_dataloader):
            src = data[0]
            trg = data[1]
            output = model(src)
            loss = criterion(output, trg)
            total_loss += loss.item()
    return total_loss/len(val_dataloader)

epochs = 10
lr = 0.001

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(Model.parameters(), lr = lr)

Model = train(Model, train_dataloader, val_dataloader, epochs, optimizer, criterion)

IndexError: index out of range in self