In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
data = pd.read_csv('train_data.csv')
data = data.values

X = data[:, 0]
Y = data[:, 1]

X = np.array([np.array([ord(c) - ord('a') + 1 for c in s]) for s in X])
Y = np.array([np.array([ord(c) - ord('a') + 1 for c in s]) for s in Y])
Y = np.array([np.append([0], y) for y in Y])

# Load Test data
test_data = pd.read_csv('eval_data.csv')
test_data = test_data.values

X_test = test_data[:,0]
Y_test = test_data[:,1]

X_test = np.array([np.array([ord(c) - ord('a') + 1 for c in s]) for s in X_test])
Y_test = np.array([np.array([ord(c) - ord('a') + 1 for c in s]) for s in Y_test])
Y_test = np.array([np.append([0], y) for y in Y_test])

# convert to torch tensors
X = torch.tensor(X)
Y = torch.tensor(Y)
X_test = torch.tensor(X_test)
Y_test = torch.tensor(Y_test)

print(X.shape, Y.shape, X_test.shape, Y_test.shape)

torch.Size([7000, 8]) torch.Size([7000, 9]) torch.Size([2000, 8]) torch.Size([2000, 9])


In [3]:
# Positional Encoding
import math
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=7000):
        super().__init__()
        self.dropout = nn.Dropout(p=dropout)
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).view(-1,1)  
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer('pe', pe)
        
    def forward(self, x: torch.tensor)->torch.tensor:
        return self.dropout(x + self.pe[:x.size(0), :])

In [4]:
# Transformer Model using nn.Transformer
class TransformerModel(nn.Module):
    def __init__(self, n_tokens, d_model, n_heads, n_encoder_layers, n_decoder_layers, dropout):
        super().__init__()
        self.model_type = 'Transformer'
        self.d_model = d_model
        # layers
        self.embedding = nn.Embedding(n_tokens, d_model)
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        self.transformer = nn.Transformer(d_model, n_heads, n_encoder_layers, n_decoder_layers, dropout=dropout, batch_first=True)
        self.decoder = nn.Linear(d_model, n_tokens)
        # apply softmax to output
        self.softmax = nn.LogSoftmax(dim=2)

    def forward(self, src, tgt):
        src = self.embedding(src) * math.sqrt(self.d_model)
        src = self.pos_encoder(src)
        tgt = self.embedding(tgt) * math.sqrt(self.d_model)
        tgt = self.pos_encoder(tgt)
        output = self.transformer(src, tgt)
        output = self.decoder(output)
        output = self.softmax(output)
        return output

    def predict(model,input_str):
        model.eval()
        output = ''
        input_str = np.array([np.array([ord(c) - ord('a') + 1 for c in input_str])])
        src = torch.tensor(input_str)
        tgt = torch.tensor([[0]])
        for _ in range(8):
            pred = model(src, tgt)
            pred = pred.argmax(dim=2)
            output += chr(pred[0, -1] + ord('a') - 1)
            tgt = torch.cat((tgt, pred[:, -1].unsqueeze(0)), dim=1)
        return output
        

In [6]:
# Hyperparameters
n_tokens = 27
d_model = 128
n_heads = 8
n_encoder_layers = 2
n_decoder_layers = 2
dropout = 0.2
batch_size = 10
epochs = 300
lr = 0.001

# model
model = TransformerModel(n_tokens, d_model, n_heads, n_encoder_layers, n_decoder_layers, dropout)

# loss function
criterion = nn.CrossEntropyLoss()

# optimizer-adam
optimizer = optim.Adam(model.parameters(), lr=lr)

In [8]:
def train(model, X, Y, criterion, optimizer, epochs, batch_size):
    model.train()
    losses = []
    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(X), batch_size):
            x = X[i:i+batch_size]
            y = Y[i:i+batch_size]
            optimizer.zero_grad()
            output = model(x, y[:, :-1])
            y_expected = np.array([np.eye(27)[t] for t in y[:, 1:]])
            y_expected = torch.tensor(y_expected, dtype=torch.float)
            output1 = output.permute(0, 2, 1) 
            y_expected1 = y_expected.permute(0, 2, 1)
            # print(output1.shape, y_expected1.shape)
            loss = criterion(output1, y_expected1)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        # loss over all batches
        n_batches = len(X) // batch_size
        losses.append(total_loss / n_batches)
        print(f'Epoch: {epoch+1}/{epochs} Loss: {total_loss / n_batches}')
    # plot loss per epoch
    plt.plot(losses)
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.show() 
    return losses, model

losses, model = train(model, X, Y, criterion, optimizer, epochs, batch_size)
# save model
# torch.save(model.state_dict(), 'model.pth')

Epoch: 1/300 Loss: 1.9614891741956983
Epoch: 2/300 Loss: 1.9641381398269109
Epoch: 3/300 Loss: 1.9567024307591574
Epoch: 4/300 Loss: 1.957624133314405
Epoch: 5/300 Loss: 1.9571417348725455
Epoch: 6/300 Loss: 1.9580325203282491
Epoch: 7/300 Loss: 1.9567292162350245
Epoch: 8/300 Loss: 1.9594153261184692
Epoch: 9/300 Loss: 1.9480813368729182
Epoch: 10/300 Loss: 1.9578615180083683
Epoch: 11/300 Loss: 1.9502811910424913
Epoch: 12/300 Loss: 1.9502758511475153
Epoch: 13/300 Loss: 1.9455712815693447
Epoch: 14/300 Loss: 1.9374895613534109
Epoch: 15/300 Loss: 1.9370778019087656
Epoch: 16/300 Loss: 1.951303196293967
Epoch: 17/300 Loss: 1.944968852996826
Epoch: 18/300 Loss: 1.934271845306669
Epoch: 19/300 Loss: 1.9374632377283914
Epoch: 20/300 Loss: 1.9331065048490252
Epoch: 21/300 Loss: 1.941590222631182
Epoch: 22/300 Loss: 1.9356689584255218
Epoch: 23/300 Loss: 1.9336652980531965
Epoch: 24/300 Loss: 1.931119247164045
Epoch: 25/300 Loss: 1.9251737335750034
Epoch: 26/300 Loss: 1.9205694251401084
E

KeyboardInterrupt: 

In [None]:
# loading the saved model
# model = TransformerModel(n_tokens, d_model, n_heads, n_encoder_layers, n_decoder_layers, dropout)
# model.load_state_dict(torch.load('model.pth'))
# model.eval()

In [None]:
def predict(model,input_str):
    model.eval()
    output = ''
    input_str = np.array([np.array([ord(c) - ord('a') + 1 for c in input_str])])
    src = torch.tensor(input_str)
    tgt = torch.tensor([[0]])
    for _ in range(8):
        pred = model(src, tgt)
        pred = pred.argmax(dim=2)
        output += chr(pred[0, -1] + ord('a') - 1)
        tgt = torch.cat((tgt, pred[:, -1].unsqueeze(0)), dim=1)
    return output

In [None]:
print(predict(model, 'dcmdllti'))

ppbbbbbb


In [20]:
# Function to check how many characters match in the two strings
def check(pred: str, true: str):
    correct = 0
    for a, b in zip(pred, true):
        if a == b:
            correct += 1
    return correct

# Function to score the model's performance
def evaluate(model):
    print("Obtaining metrics for eval data:")
    eval_data = pd.read_csv("eval_data.csv").to_numpy()
    results = {
        "pred": [],
        "true": [],
        "score": [],
    }
    correct = [0 for _ in range(9)]
    for x, y in eval_data:
        pred = predict(model, x)
        print(f"Predicted: {pred}, True: {y}")
        score = check(pred, y)
        results["pred"].append(pred)
        results["true"].append(y)
        results["score"].append(score)

        correct[score] += 1
    print("Eval dataset results:")
    for num_chr in range(9):
        print(
            f"Number of predictions with {num_chr} correct predictions: {correct[num_chr]}"
        )
    points = sum(correct[4:6]) * 0.5 + sum(correct[6:])
    marks = round(min(2, points / 1400 * 2) * 2) / 2  # Rounds to the nearest 0.5
    print(f"Points: {points}")
    print(f"Marks: {marks}")
    # Save predicitons and true sentences to inspect manually if required.
    pd.DataFrame.from_dict(results).to_csv("results_eval.csv", index=False)


evaluate(model)


Obtaining metrics for eval data:
Predicted: yanizsda, True: ldlhgjuj
Predicted: yanizsda, True: mffolhhl
Predicted: psminhli, True: xjwbqnnq
Predicted: yanizmgh, True: nqxwxmtb
Predicted: psminhli, True: dlbdbgvx
Predicted: psminhli, True: fwkibsou
Predicted: yanizmgh, True: ulafntih
Predicted: yanizsda, True: ontitdlb
Predicted: psminhli, True: epprjfot
Predicted: yanizmgh, True: sdgztwup
Predicted: psminhli, True: sqcdadyx
Predicted: psminhli, True: ojsllopa
Predicted: yanizmgh, True: zdtsndhk
Predicted: psminhli, True: kiwtuwyj
Predicted: psminhli, True: jxfwiaky
Predicted: yanizmgh, True: cxqzjrox
Predicted: yanizsda, True: vyakrkdv
Predicted: psminhli, True: rfoaeevr
Predicted: yqzanopr, True: bgbcvwei
Predicted: yanizsda, True: yrozhdru
Predicted: yqzanopr, True: gnulhwmv
Predicted: yanizsdz, True: kflttcgt
Predicted: yanizsda, True: tlcmfsqf
Predicted: yanizsda, True: kwnizswj
Predicted: psminhli, True: ebyojfqs
Predicted: psminhli, True: eyfqqwxb
Predicted: psminhli, True: prgz

KeyboardInterrupt: 