## Importing Libraries 

In [1]:
import torch 
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import unicodedata
import string 
import re
import time 
from torch.autograd import Variable
import math 
import random
from sklearn.model_selection import train_test_split

In [2]:
device= torch.device("cuda")

## Data Loading  

In [3]:
trainData=torch.load('train_data_50_mut_1_c.pt')
trainLabel=torch.load('train_labels_50_mut_1_c.pt')
testData=torch.load('test_data_50_mut_1_c.pt')
testLabel=torch.load('test_labels_50_mut_1_c.pt')

In [73]:
training_words= 9608    #48042 # 90% for trianing 
test_words= 1068#5338 # 10% for testing

## Model Creation

In [67]:
# Encoder Architecture 
class EncoderLSTM(nn.Module):
    def __init__(self,input_size, hidden_size):
        super(EncoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding=nn.Embedding(input_size, hidden_size)
        self.lstm=nn.LSTM(hidden_size, hidden_size )
    
    def forward(self, x, h_init, c_init):
        
        g_seq=self.embedding(x)
        output, (h_final,c_final)=self.lstm(g_seq, (h_init, c_init))

        return output, h_final, c_final

In [68]:
# Decoder Architecture 
class DecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm=nn.LSTM(hidden_size, hidden_size)
        self.out=nn.Linear(hidden_size, output_size)

    def forward(self, x, h_init, c_init):
        output=self.embedding(x)
        output, (h_final, c_final)=self.lstm(output, (h_init, c_init))
        output=self.out(output) 
        return output, h_final, c_final

## Hyperparameters

In [74]:
# vocablulary size 26 charcaters +sow and eow tokens
hidden_size=256
vocab_size=26
encoder=EncoderLSTM(vocab_size,hidden_size).cuda()
decoder=DecoderLSTM(hidden_size,vocab_size).cuda()
my_lr=0.04
criterion=nn.CrossEntropyLoss()
SOW_token=0
EOW_token=27

EncoderLSTM(
  (embedding): Embedding(26, 256)
  (lstm): LSTM(256, 256)
)
EncoderLSTM(
  (embedding): Embedding(26, 256)
  (lstm): LSTM(256, 256)
)
DecoderLSTM(
  (embedding): Embedding(26, 256)
  (lstm): LSTM(256, 256)
  (out): Linear(in_features=256, out_features=26, bias=True)
)


## Utilities 

In [70]:
def get_error( scores , labels ):

    bs=scores.size(0)
    predicted_labels = scores.argmax(dim=1)
    indicator = (predicted_labels == labels)
    num_matches=indicator.sum()
    num_words=num_matches/len(predicted_labels)
    return num_words.item()


def save_model_parameters():
# save models 
    torch.save({
                'epoch': epoch,
                'encoder_model': encoder.state_dict(),
                'decoder_model':decoder.state_dict(),
                'encoder_optimizer': encoder_optimizer.state_dict(),
                'decoder_optimizer': decoder_optimizer.state_dict(),
                'train_loss': loss_plt, 'train_accuracy': accu_plt, 
                'test_loss': test_loss_plt,
                'test_accuracy': accuracy_test_plt,
                'confusion matrix_parameters':cnt_list },  'seq_seq_model_sinlgle_char_50_mutation.pt')
#                 'incorrect_to_correct':cnt_list[0],  'correct_to_correct':cnt_list[2],
#                 'correct_to_incorrect':cnt_list[1],'regenerate':cnt_list[3] 
                  
def confusion_parameters(scores,target_tensor,inpute_tensor):
    
    conf_counter= torch.zeros(4)

  
    if torch.all(torch.eq(target_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making incorrect->correct
        conf_counter[0] +=1

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==0 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->incorrect

        conf_counter[1] +=1    


    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->correct

        conf_counter[2] +=1                                                               

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making inccorrect->correct or regenerate the input 

        conf_counter[3] +=1  
    return conf_counter


## Evaluation

In [71]:
def eval_on_test_set():
    encoder.eval()
    decoder.eval()
    loss_word=0
    run_error=0.0
    loss_plt=[]
    acc_plt=[]
    # counts for computing correct and incorrect proportion
    count=torch.zeros(4)
    cnf_mtx_count=torch.zeros(4)
    
    for count in range(test_words):
        loss_char=0
        input_tensor = testData[count].cuda()
        target_tensor= testLabel[count].cuda()

        # input and target words length
        input_length = input_tensor.size(0)
        # checkking the correct accuracy
        target_length = target_tensor.size(0)

        #initial hidden states
        encoder_h = Variable(torch.zeros(1, 1,hidden_size))
        encoder_c = Variable(torch.zeros(1, 1,hidden_size))

        #send to GPU
        encoder_h=encoder_h.to(device)
        encoder_c=encoder_c.to(device)

        #encoding
        encoder_out,encoder_h,encoder_c = encoder(input_tensor.view(input_length,1), encoder_h, encoder_c)

        # transfering feature vector from encoder to decoder
        decoder_h = encoder_h 
        decoder_c = encoder_c

        #initial input decoder
        decoder_input=torch.LongTensor([SOW_token]).cuda()

        # decoding
        # outputs t ozero
        outputs = torch.zeros(input_length, vocab_size).to(device)

       
        for dc in range(input_length):
            decoder_out, deocder_h, decoder_c= decoder(decoder_input.view(1,1),decoder_h,decoder_c)

            outputs[dc]=decoder_out
            top1 = decoder_out[0][0].argmax()
            decoder_input=top1
            loss_char += criterion(decoder_out.view(1,vocab_size),target_tensor[dc].unsqueeze(0)) 
        
        run_error+=get_error( outputs , target_tensor)
        loss_word+= loss_char/input_length 
        
         #=======================================================#
        # compute confusion paramters 
        cnf_mtx_count+=confusion_parameters(outputs,target_tensor,input_tensor)
        #============================================# 
       
         ###====Different Metrics for evaluation=============###
    
    # counter for computing confusion matrix

    
    # accuracy and loss 
    total_loss = loss_word/test_words
    loss_plt.append(total_loss)
    accuracy= (run_error/test_words)*100
    acc_plt.append(accuracy)
    print('test::: (loss) = ', (total_loss.item()) ,'\t (accuracy)=' , (accuracy),'%')
    return loss_plt, acc_plt, cnf_mtx_count

## Training 

In [75]:
start=time.time()
teacher_forcing_ratio = 0.4
# inilize variables for saving losses and accuracies 

accu_plt=[]
loss_plt=[]
test_loss_plt=[]
accuracy_test_plt=[]
cnt_list=[]
for epoch in range(1,1000):
    encoder.train()
    decoder.train()
    # divide the learning rate by 1.1 each 10 epoch
    if epoch %10==0:
        my_lr = my_lr / 1.1
#         torch.save(encoder, 'encoder_v2.pth') 
#         torch.save(decoder, 'decoder_v2.pth') 
    # optimizer
    encoder_optimizer =torch.optim.SGD(encoder.parameters(), lr=my_lr)
    decoder_optimizer =torch.optim.SGD(decoder.parameters(), lr=my_lr)
    
    #clear the loss for every epoch 
    total_loss=0
    loss=0
    loss_word=0
    loss_char=0
    run_error=0.0
    
    # chooosing diffrent samples of data every epoch
    
    for count in range(training_words):
        input_tensor = trainData[count].cuda()
        target_tensor= trainLabel[count].cuda()
       

        # input and target words length
        input_length  = input_tensor.size(0)
        target_length = target_tensor.size(0)
        
        #initial hidden states
        encoder_h = Variable(torch.zeros(1, 1,hidden_size))
        encoder_c = Variable(torch.zeros(1, 1,hidden_size))

        #send to GPU
        encoder_h=encoder_h.to(device)
        encoder_c=encoder_c.to(device)

        # set the gradient values to zero 
        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        # Applying encoder on every character of the word  
        # for ec in range(input_length):

        encoder_out,encoder_h,encoder_c = encoder(input_tensor.view(input_length,1), encoder_h, encoder_c)


        # applying output hidden feature of encoder as input hidden feature to decoder
        decoder_h = encoder_h 
        decoder_c = encoder_c
        
        # Delivering output feature vector of the encoder into the decoder
        decoder_input=torch.LongTensor([SOW_token]).cuda()
        outputs = torch.zeros(input_length, vocab_size).to(device)
        # Decoding Part
        use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
        outputs = torch.zeros(input_length, vocab_size).to(device)

        if use_teacher_forcing:
        # Applying Teacher forcing for the by feeding the target ouput as the next input
        # for dc in range(target_length):
            for dc in range(input_length):
                decoder_out, deocder_h, decoder_c= decoder(decoder_input.view(1,1),decoder_h,decoder_c)

                outputs[dc]=decoder_out
                top1 = decoder_out[0][0].argmax()
                decoder_input=target_tensor[dc]
                loss+= criterion(decoder_out.view(1,vocab_size),target_tensor[dc].unsqueeze(0))   # applying teacher forcing
            run_error+=get_error( outputs , target_tensor)
        else: 
            for dc in range(input_length):
                decoder_out, deocder_h, decoder_c= decoder(decoder_input.view(1,1),decoder_h,decoder_c)

                outputs[dc]=decoder_out
                top1 = decoder_out[0][0].argmax()
                decoder_input=top1
                loss+= criterion(decoder_out.view(1,vocab_size),target_tensor[dc].unsqueeze(0)) 
            run_error+=get_error( outputs , target_tensor)
        # backpropagate the average loss of one word
        loss.backward()
        
        # update the weights every word
        encoder_optimizer.step()
        decoder_optimizer.step()
        loss= loss.item()/input_length
        loss_char+= loss # returning the loss across one word
        
    total_loss=loss_char/training_words
    accuracy= (run_error/training_words)*100
    accu_plt.append(accuracy)
    loss_plt.append(total_loss)
    elapsed = time.time()-start
    # print('')
    print('Train:::', 'epoch=',epoch,'\t lr=', my_lr, '\t (loss)=',(total_loss),'\t (accuracy)=' , (accuracy),'%','\t time=', elapsed)
    
    # evaluate on test set to monitor the loss 
    loss_tst_plt, acc_tst_plt,confusion_param =eval_on_test_set()
    
    # saving test parameters  
    test_loss_plt.append(loss_tst_plt)
    accuracy_test_plt.append(acc_tst_plt)
    #### Saving model parameters every epoch  
#         # counter for computing confusion matrix
    cnt_list.append(confusion_param)

    
    save_model_parameters()

Train::: epoch= 1 	 lr= 0.04 	 (loss)= 0.17199699144068026 	 (accuracy)= 92.64154870940882 % 	 time= 141.53442645072937
test::: (loss) =  6.010216236114502 	 (accuracy)= 0.7490636704119851 %
Train::: epoch= 2 	 lr= 0.04 	 (loss)= 0.12507828081167174 	 (accuracy)= 93.61990008326396 % 	 time= 289.28952980041504
test::: (loss) =  5.488241672515869 	 (accuracy)= 1.8726591760299627 %
Train::: epoch= 3 	 lr= 0.04 	 (loss)= 0.10933578504959705 	 (accuracy)= 94.4525395503747 % 	 time= 434.35473346710205
test::: (loss) =  5.096492767333984 	 (accuracy)= 2.0599250936329585 %
Train::: epoch= 4 	 lr= 0.04 	 (loss)= 0.09487209790487784 	 (accuracy)= 94.85845129059118 % 	 time= 579.6163506507874
test::: (loss) =  5.028802871704102 	 (accuracy)= 2.3408239700374533 %
Train::: epoch= 5 	 lr= 0.04 	 (loss)= 0.08048147634310013 	 (accuracy)= 95.16028309741881 % 	 time= 725.3104503154755
test::: (loss) =  5.318463325500488 	 (accuracy)= 2.4344569288389515 %
Train::: epoch= 6 	 lr= 0.04 	 (loss)= 0.0786568

test::: (loss) =  0.6103612780570984 	 (accuracy)= 84.8314606741573 %
Train::: epoch= 42 	 lr= 0.02732053821460282 	 (loss)= 0.007147632696620369 	 (accuracy)= 99.56286427976686 % 	 time= 6117.595319986343
test::: (loss) =  0.5800907611846924 	 (accuracy)= 85.39325842696628 %
Train::: epoch= 43 	 lr= 0.02732053821460282 	 (loss)= 0.006720383517155499 	 (accuracy)= 99.68776019983348 % 	 time= 6264.2494394779205
test::: (loss) =  0.5573002696037292 	 (accuracy)= 87.17228464419475 %
Train::: epoch= 44 	 lr= 0.02732053821460282 	 (loss)= 0.00475455230929005 	 (accuracy)= 99.67735220649459 % 	 time= 6411.304867506027
test::: (loss) =  0.5012567639350891 	 (accuracy)= 88.38951310861424 %
Train::: epoch= 45 	 lr= 0.02732053821460282 	 (loss)= 0.005969518075878132 	 (accuracy)= 99.65653621981681 % 	 time= 6558.417688846588
test::: (loss) =  0.5042306184768677 	 (accuracy)= 88.38951310861424 %
Train::: epoch= 46 	 lr= 0.02732053821460282 	 (loss)= 0.00508055113972569 	 (accuracy)= 99.6669442131

KeyboardInterrupt: 

### Loading model tutorial

In [76]:
# # loading model 
checkpoint = torch.load('seq_seq_model_sinlgle_char_50_mutation.pt')
encoder.load_state_dict(checkpoint['encoder_model'])
decoder.load_state_dict(checkpoint['decoder_model'])
encoder_optimizer.load_state_dict(checkpoint['encoder_optimizer'])
decoder_optimizer.load_state_dict(checkpoint['decoder_optimizer'])
epoch = checkpoint['epoch']
train_loss = checkpoint['train_loss']
test_loss = checkpoint['test_loss']
train_acc = checkpoint['train_accuracy']
test_acc = checkpoint['test_accuracy']
# model.eval()