## Import the Libraries

In [11]:
import pandas
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from torchvision.transforms import Lambda, ToTensor

## Write the Datasets and Dataloaders

In [12]:
# Create a Custom Dataset from train_data.csv and eval_data.csv
class CustomDataset(Dataset):
    def __init__(self, csv_file, transform=None):
        self.data = pandas.read_csv(csv_file)
        self.transform = transform
        # Remove the first row from the Dataframe
        self.data = self.data.iloc[1:]

    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist() # Convert the tensor to a list
        # Get the data from the Dataframe
        data = self.data.iloc[idx]
        # Convert the data to a numpy array
        data = data[0] # First column of the Dataframe
        data_num = []
        for i in data:
            data_num.append(ord(i)- 96)
        data = np.array(data_num)
        # Convert the data to a tensor
        data = torch.from_numpy(data)
        label = self.data.iloc[idx]
        label = label[1]
        label_num = []
        for i in label:
            label_num.append(ord(i)- 96)
        # add 0 at pos 0 to label
        label_num.insert(0, 0)
        label = np.array(label_num)
        if self.transform:
            data = self.transform(data) # Apply the transform on the data
        return data, label

# Write a DataLoader for the Custom Dataset
train_dataset = CustomDataset('./A3 files/train_data.csv', transform=None)

# Split the Dataset into Train and Validation Datasets
train_dataset, val_dataset = train_test_split(train_dataset, test_size=0.2)

# Create a DataLoader for the Train and Validation Datasets
train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=1, shuffle=True)

############################# TESTING CODE #############################

# Print out some data from the Train Dataset
for i, data in enumerate(train_dataloader):
    print(data)
    break

[tensor([[17, 14, 26, 25, 15, 18, 15,  9]]), tensor([[ 0, 26, 26,  6, 19, 19,  5, 11, 17]])]


In [13]:
print(train_dataset[0])

(tensor([14, 17, 16, 23, 18, 24, 12,  3]), array([ 0,  6, 14,  7,  4,  6, 17, 22, 12]))


## Write the Transformer Class

In [14]:
import math

class PositionalEncoding(nn.Module):
    # Injecting Some Information about the Relative or the Absolute Positioning of the tokens in the sequence
    def __init__(self, d_model, dropout = 0.1, max_len ):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p = dropout)
        position = []
        pos_enc = torch.zeros(max_len, d_model) # Positional Encoding -> Max Length and the dimensions of the model
        for i in range(max_len):
            position.append(i)
        position = torch.tensor(position).unsqueeze(1) # Got the pos -> value
        # Now, you want to make the position term to be max_len, d_model
        position_stacked = [position] * d_model 
        position = torch.cat(position_stacked, dim=1)
        # Now to obtain the 10000^2i/d_model 
        div_term = torch.arange(0, d_model, 2) # The Value to be divided
        div_term = div_term/d_model
        div_term = div_term.type(torch.float64)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))   
        pos_enc[:, 0::2] = torch.sin(position[:, 0::2]*div_term)
        pos_enc[:, 1::2] = torch.cos(position[:, 1::2]*div_term)
        self.pos_enc = pos_enc
        return
    
    def forward(self, x):
        x = x + self.pos_enc # This is placeholder -> Modify to include only the len given -> Not Max Len
        #print("Positional Encoding Completed")
        return self.dropout(x)

# Create a Transformer Encoder
class TransformerEncoder(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward, dropout=0.1, activation="relu"):
        super(TransformerEncoder, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.num_layers = num_layers
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout
        self.activation = activation
        self.pos_enc = PositionalEncoding(d_model, dropout, max_len = 8)
        self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, activation), num_layers)
        return
    
    def forward(self, x):
        x = self.pos_enc(x)
        x = self.transformer_encoder(x)
        return x

# Create a Transformer Decoder
class TransformerDecoder(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward, dropout=0.1, activation="relu"):
        super(TransformerDecoder, self).__init__()
        self.d_model = d_model
        self.nhead = nhead
        self.num_layers = num_layers
        self.dim_feedforward = dim_feedforward
        self.dropout = dropout
        self.activation = activation
        self.pos_enc = PositionalEncoding(d_model, dropout, max_len = 9)
        self.transformer_decoder = nn.TransformerDecoder(nn.TransformerDecoderLayer(d_model, nhead, dim_feedforward, dropout, activation), num_layers)
        self.linear = nn.Linear(d_model, 27)
        return
    
    def forward(self, target, memory):
        target = self.pos_enc(target)
        target_mask = torch.triu(torch.ones(9, 9), diagonal=1)
        # reshape target and memory
        target = target.permute(1, 0, 2)
        memory = memory.permute(1, 0, 2)
        #print(target.shape, memory.shape, target_mask.shape, "------>")
        x = self.transformer_decoder(target, memory)
        x = self.linear(x)
        return x

# Create a Transformer Model
class Transformer(nn.Module):
    def __init__(self, d_model, nhead, num_layers, dim_feedforward, dropout=0.1, activation="relu"):
        super(Transformer, self).__init__()
        self.embedding = nn.Embedding(27, d_model)
        self.embed_outputs = nn.Embedding(27, d_model)
        self.encoder = TransformerEncoder(d_model, nhead, num_layers, dim_feedforward, dropout, activation)
        self.decoder = TransformerDecoder(d_model, nhead, num_layers, dim_feedforward, dropout, activation)
        return
    
    def forward(self, x, y):
        x = self.embedding(x) # X = batch_size, seq_len, d_model
        x = self.encoder(x) 
        y = self.embed_outputs(y)
        y = self.decoder(y, x)
        return y

### Train the Model

In [60]:
# Create a Transformer Model
Model = Transformer(300, 2, 2, 300, 0.1, "relu")

# Create a Loss Function
criterion = nn.CrossEntropyLoss()

# Create an Optimizer
optimizer = torch.optim.Adam(Model.parameters(), lr=0.0001)

# Create a Training Loop
acc_tot = 0
tot_cnt = 0
def train_loop(dataloader, model, loss_fn, optimizer):
    acc_tot = 0
    tot_cnt = 0
    size = len(dataloader.dataset)
    for batch, ( X, y) in enumerate(dataloader):
            y_loc = []
            # append 
            # append 9 0s to y_loc
            for i in range(9):
                y_loc.append(0)
            #print(y_loc, "---------------->")
            tot_loss = 0
            for i in range(8):
                # convert y_loc to tensor of shape 1*seq_le
                #print(y_loc)
                y_loc_tensor = torch.tensor(y_loc)
                # make it 2 dim
                y_loc_tensor = y_loc_tensor.unsqueeze(0)
                #print("In shape: ", y_loc_tensor.shape)
                output = model(X, y_loc_tensor)
                output = output.permute(1,0,2)
                preds = output.argmax(2)
                out_here = output[:, i+1, :]
                #print("Out here shape: ", out_here.shape)
                target = y[:, i+1]
                loss = loss_fn(out_here, target)
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()
                tot_loss += loss
            # back propogate total loss
            # tot_loss.backward()
            # # update parameters
            # optimizer.step()
            # # zero the gradients
            # optimizer.zero_grad()
            # calc acc based on preds and y
            preds = preds.squeeze(0)
            #print(preds.shape, y.shape)
            accuracy = (preds == y).sum()
            accuracy = accuracy.item()
            # divide acc by seq_len
            accuracy = accuracy/8
            print("Epoch: ", t+1, "Batch: ", batch+1, "Accuracy: ", accuracy, "Loss: ", tot_loss.item())

    
epochs = 10

for t in range(1):
    print(f"Epoch {t+1}\n-------------------------------")
    train_loop(train_dataloader, Model, criterion, optimizer)



Epoch 1
-------------------------------
Epoch:  1 Batch:  1 Accuracy:  0.25 Loss:  24.400714874267578
Epoch:  1 Batch:  2 Accuracy:  0.0 Loss:  29.97183609008789
Epoch:  1 Batch:  3 Accuracy:  0.0 Loss:  34.97869873046875
Epoch:  1 Batch:  4 Accuracy:  0.0 Loss:  26.695940017700195
Epoch:  1 Batch:  5 Accuracy:  0.0 Loss:  28.882400512695312
Epoch:  1 Batch:  6 Accuracy:  0.0 Loss:  30.11745834350586
Epoch:  1 Batch:  7 Accuracy:  0.125 Loss:  28.767696380615234
Epoch:  1 Batch:  8 Accuracy:  0.0 Loss:  29.706167221069336
Epoch:  1 Batch:  9 Accuracy:  0.125 Loss:  31.31991958618164
Epoch:  1 Batch:  10 Accuracy:  0.0 Loss:  28.47806739807129
Epoch:  1 Batch:  11 Accuracy:  0.125 Loss:  26.855194091796875
Epoch:  1 Batch:  12 Accuracy:  0.0 Loss:  29.770885467529297
Epoch:  1 Batch:  13 Accuracy:  0.0 Loss:  29.270078659057617
Epoch:  1 Batch:  14 Accuracy:  0.0 Loss:  26.626773834228516
Epoch:  1 Batch:  15 Accuracy:  0.375 Loss:  25.200061798095703
Epoch:  1 Batch:  16 Accuracy:  0.0

KeyboardInterrupt: 

In [50]:
def val_loop(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    acc_sum = 0
    cnt = 0
    with torch.no_grad():
        for X, y in dataloader:
            y_loc = []
            # append 
            # append 9 0s to y_loc
            for i in range(9):
                y_loc.append(0)
            print(y_loc, "---------------->")
            for i in range(8):
                # convert y_loc to tensor of shape 1*seq_le
                #print(y_loc)
                y_loc_tensor = torch.tensor(y_loc)
                # make it 2 dim
                y_loc_tensor = y_loc_tensor.unsqueeze(0)
                #print("In shape: ", y_loc_tensor.shape)
                output = model(X, y_loc_tensor)
                output = output.permute(1,0,2)
                #print("Out shape", output.shape)
                output = output.argmax(2)
                #print(output.shape, "----->")
                output =output[:, i+1]
                # append to y_loc
                val = output.item()
                y_loc[i+1] = val
                #print(y_loc, "------><<<<")
            print("done")
            y_loc = y_loc[1:]
val_loop(val_dataloader, Model, criterion)
            

[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 0, 0, 0, 0, 0, 0, 0, 0] ---------------->
done
[

KeyboardInterrupt: 

In [None]:
# for batch, (X, y) in enumerate(dataloader):
    #     # Compute the prediction and the loss
    #     pred = model(X, y)
    #     # reshape preds to 1 2 0
    #     pred = pred.permute(1, 2,0)
    #     #print(pred.shape, y.shape)
    #     loss = loss_fn(pred, y)
    #     # Backpropagation
    #     optimizer.zero_grad()
    #     loss.backward()
    #     optimizer.step()
    #     # calculate accuracy
    #     accuracy = (pred.argmax(1) == y).type(torch.float).sum().item()
    #     # divide by batch size and number of chars
    #     accuracy = accuracy/(y.shape[0]*y.shape[1])
    #     acc_tot += accuracy
    #     tot_cnt += 1
    #     acc_here = acc_tot/tot_cnt
    #     # decode the preds using argmax
    #     prediction = pred.argmax(1)
    #     acc_here = acc_tot/tot_cnt
    #     # Print the loss
    #     if batch % 100 == 0:
    #         loss, current = loss.item(), batch * len(X)
    #         # print epoch, loss, batch, acc_here
    #         print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}] accuracy: {acc_here:>7f}")

# Create a Validation Loop  
# def val_loop(dataloader, model, loss_fn):
#     size = len(dataloader.dataset)
#     num_batches = len(dataloader)
#     model.eval()
#     test_loss, correct = 0, 0
#     acc_sum = 0
#     cnt = 0
#     with torch.no_grad():
#         for X, y in dataloader:
#             pred = model(X, y)
#             pred = pred.permute(1, 2,0)
#             test_loss += loss_fn(pred, y).item()
#             accuracy = (pred.argmax(1) == y).type(torch.float).sum().item()
#             accuracy = accuracy/(y.shape[0]*y.shape[1])
#             acc_sum += accuracy
#             cnt += 1
#     test_loss /= num_batches
#     acc_here = acc_sum/cnt
#     print("Eval loop: loss: ", test_loss, " accuracy: ", acc_here)
            
# Train the Model

In [None]:
pred = model(X, y)
            pred = pred.permute(1, 2,0)
            test_loss += loss_fn(pred, y).item()
            accuracy = (pred.argmax(1) == y).type(torch.float).sum().item()
            accuracy = accuracy/(y.shape[0]*y.shape[1])
            acc_sum += accuracy
            cnt += 1
    test_loss /= num_batches
    acc_here = acc_sum/cnt
    print("Eval loop: loss: ", test_loss, " accuracy: ", acc_here)