In [1]:
import torch 
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np 
import time 
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import random_split
import math
import string

from global_defs import *

In [2]:
train_data_1=torch.load('train_data_50_mut_1_c.pt')
test_data_1=torch.load('test_data_50_mut_1_c.pt')
train_labels_1=torch.load('train_labels_50_mut_1_c.pt')
test_labels_1=torch.load('test_labels_50_mut_1_c.pt')

In [3]:
num_train_words=len(train_data_1)
num_test_words=len(test_data_1)

In [4]:
num_train_words=48042 # 1000 words 50 misspilled single mistake 
num_test_words=5338 

In [6]:
class Recurrent_Layer(nn.Module):
    
    def __init__(self, vocab_size, hidden_size):
        super(Recurrent_Layer, self).__init__()
        

        self.layer1 = nn.Embedding (vocab_size, hidden_size)
        self.lstm=nn.LSTM( hidden_size, hidden_size) #bidirectional=True
#         self.dropout=nn.Dropout(0.2)
        self.fc=nn.Linear( hidden_size, vocab_size)

    def forward(self, x, h_init,c_init):

        g_seq          =self.layer1(x)
        h_seq, (h_final, c_final) =self.lstm(g_seq, (h_init,c_init))
#         h_seq=self.dropout(h_seq)
        score_seq      =self.fc(h_seq)
        
        return score_seq,  h_final , c_final

In [7]:
hidden_size=256
vocab_size =26
# word_length= 19 #  according to the data 
batch_of_words=1

lstm_net=Recurrent_Layer(vocab_size,hidden_size).cuda()

lstm_net = lstm_net.to(device)
print(lstm_net)
criterion = nn.CrossEntropyLoss()
my_lr=0.05
accu_plt=[]
loss_plt=[]
test_loss_plt=[]
accuracy_test_plt=[]

Recurrent_Layer(
  (layer1): Embedding(26, 256)
  (lstm): LSTM(256, 256)
  (fc): Linear(in_features=256, out_features=26, bias=True)
)


## Utilities

In [10]:
def get_error( scores , labels ):

    bs=scores.size(0)
    predicted_labels = scores.argmax(dim=1)
    indicator = (predicted_labels == labels)
    num_matches=indicator.sum()
    corr=num_matches/len(predicted_labels)
    return corr.item()

def save_model_parameters():
# save models 
    torch.save({
                'epoch': epoch,
                'lstm_model':lstm_net.state_dict(),
                'lstm_optimizer': lstm_optimizer.state_dict(),
                'train_loss': loss_plt, 'train_accuracy': accu_plt, 
                'test_loss': test_loss_plt,
                'test_accuracy': accuracy_test_plt, 'confusion matrix_parameters':confusion_mtx_parameters  }, 'lstm_single_Basic Model.pt')
#                 'incorrect_to_correct':confusion_mtx_parameters[0],  'correct_to_correct':confusion_mtx_parameters[2],
#                 'correct_to_incorrect':confusion_mtx_parameters[1],'regenerate':confusion_mtx_parameters[3]
  
           
               
     
        #print(in_word, out_word)
        
def confusion_parameters(scores,target_tensor,inpute_tensor,conf_counter):

    
    if torch.all(torch.eq(target_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making incorrect->correct
        conf_counter[0] +=1

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==0 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->incorrect

        conf_counter[1] +=1    


    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->correct

        conf_counter[2] +=1                                                               

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making inccorrect->correct or regenerate the input 

        conf_counter[3] +=1  
    return conf_counter




## Evaluation

In [11]:
def eval_on_test_set():
    # to deactivate dropout regularization during testing
    lstm_net.eval()
    running_loss=0
    num_batches=0    
    num_matches=0 
    acc_plt=[]
    loss_plt=[]
    
    # counts for computing correct and incorrect proportion
    count=torch.zeros(4)
    cnt_list=torch.zeros(4)
    cnf_mtx_count=torch.zeros(4)
    
    for i in range(0,num_test_words):
        
        h = torch.zeros(1,batch_of_words,  hidden_size).cuda()
        c = torch.zeros(1,batch_of_words,  hidden_size).cuda()
        h=h.to(device)
        c=c.to(device)
        word_length= len(test_data_1 [i])
        
        inpute_tensor=  test_data_1[i].cuda()
        target_tensor =  test_labels_1[i].cuda()
        
        # sending to GPU
        inpute_tensor=inpute_tensor.to(device)
        target_tensor=target_tensor.to(device)
        # forward pass
        scores_char, h, c= lstm_net(inpute_tensor.view(word_length,1), h, c)
        # reshape before calculating the loss for easier slicing of mini batch of words
        scores_char = scores_char.view(word_length*batch_of_words,vocab_size)
        target_tensor = target_tensor.contiguous()
        target_tensor = target_tensor.view(word_length*batch_of_words)
        # calculating the loss of batch of character sthat constrcut a words
        loss_char= criterion(scores_char, target_tensor)# do we need to add the loss for every 
        
            #================================================#
        # accumalate the loss
        running_loss+= loss_char.item()
        num_batches+=1 
        # computing accuracy
        num_matches+= get_error(scores_char, target_tensor)
        #=======================================================#
        # compute confusion paramters 
        cnf_mtx_count+=confusion_parameters(scores_char,target_tensor,inpute_tensor,count)
        #============================================# 
        

    ###====Different Metrics for evaluation=============###
    
    # counter for computing confusion matrix
#     cnt_list[0]+=cnf_mtx_count[0].item()
#     cnt_list[1]+=cnf_mtx_count[1].item()
#     cnt_list[2]+=cnf_mtx_count[2].item()
#     cnt_list[3]+=cnf_mtx_count[3].item()
  
    # accuracy and loss 
    accuracy= (num_matches/num_test_words)*100
    acc_plt.append(accuracy)
    total_loss = running_loss/num_batches 
    loss_plt.append(total_loss)
    
    # printing results 
    print('Test==: loss = ',(total_loss),'\t accuracy=', accuracy,'%' )
    return acc_plt, loss_plt, cnf_mtx_count

## Training 

In [56]:
eval_on_test_set()

Test==: loss =  0.6082124173641205 	 accuracy= 20.0 %


([20.0], [0.6082124173641205], tensor([9., 1., 6., 0.]))

In [12]:
start=time.time()
# shuff_index=torch.LongTensor(485224).random_(0,485224)
# train_data_2=train_data_1[:,shuff_index]
# train_labels_2=train_labels_1[:,shuff_index]

accu_plt=[]
loss_plt=[]
test_loss_plt=[]
accuracy_test_plt=[]
confusion_mtx_parameters=[]

for epoch in range(1,1000):
    # to activate dropout during training 
    lstm_net.train()
    # divide the learning rate by 3 except after the first epoch
    if epoch % 10==0:
        my_lr = my_lr / 1.1

    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    lstm_optimizer=torch.optim.SGD( lstm_net.parameters() , lr=my_lr )
    # set the initial h and c to be the zero vector

        
    # set the running quatities to zero at the beginning of the epoch
    running_loss=0
    num_batches=0    
    num_matches=0
    
    # loop across batch of words
    for i in range(0,num_train_words):
      # initilize the hidden state every word as the words ara independent 
    
        h = torch.zeros(1,batch_of_words,  hidden_size).cuda()
        c = torch.zeros(1,batch_of_words,  hidden_size).cuda()
        h=h.to(device)
        c=c.to(device)
    
        word_length= len(train_data_1 [i])
    # Set the gradients to zeros
        lstm_optimizer.zero_grad()

        minibatch_words  =  train_data_1[i ]
        minibatch_labels =  train_labels_1[ i]
        # sending to GPU
        minibatch_words=minibatch_words.to(device)
        minibatch_labels=minibatch_labels.to(device)


        # Detach to prevent from backpropagating all the way to the beginning
        # Then tell Pytorch to start tracking all operations that will be done on h and c
        h=h.detach()
        c=c.detach()
        h=h.requires_grad_()
        c=c.requires_grad_()

        # forward pass
        scores_char, h, c = lstm_net(minibatch_words.view(word_length,1), h, c)

        # reshape before calculating the loss for easier slicing of mini batch of words
        scores_char = scores_char.view(  word_length*batch_of_words , vocab_size)
        minibatch_labels = minibatch_labels.contiguous()
        minibatch_labels = minibatch_labels.view(word_length*batch_of_words )
        # calculating the loss of batch of character sthat constrcut a words
        loss_char= criterion(scores_char, minibatch_labels)

        #===============================================

        # summation of both lossses to 
        combined_loss=loss_char


        # backward pass to compute dL/dR, dL/dV and dL/dW
        combined_loss.backward()

        # update the wights 
        lstm_optimizer.step()


        # update the running loss  
        running_loss += combined_loss.detach().item()
        num_batches += 1
        num_matches += get_error(scores_char, minibatch_labels)
        
        # end of iteration 
        
    # compute for full training set
    total_loss = running_loss/num_batches
    loss_plt.append(total_loss)
    accuracy= (num_matches/num_train_words)*100
    accu_plt.append(accuracy)
    # Compute the time 
    elapsed = time.time()-start
    
    print('')
    print('Train:::', 'epoch=',epoch,'\t lr=', my_lr, '\t (loss)=',(total_loss),'\t (accuracy)=' , (accuracy),'%','\t time=', elapsed)
    
    # evaluate on test set to monitor the loss 
    acc_tst_plt, loss_tst_plt, confusion_param=eval_on_test_set()
    
    # saving test parameters  
    test_loss_plt.append(loss_tst_plt)
    accuracy_test_plt.append(acc_tst_plt)
    confusion_mtx_parameters.append(confusion_param)
    #### Saving model parameters every epoch 
    save_model_parameters() 
        


Train::: epoch= 1 	 lr= 0.05 	 (loss)= 0.19623494833344096 	 (accuracy)= 63.76920194829524 % 	 time= 237.37316846847534
Test==: loss =  1.7910580363430106 	 accuracy= 0.9554140127388535 %

Train::: epoch= 2 	 lr= 0.05 	 (loss)= 0.1517772299977759 	 (accuracy)= 71.13775446484325 % 	 time= 494.4772083759308
Test==: loss =  1.8183493763665937 	 accuracy= 1.2551517422255525 %

Train::: epoch= 3 	 lr= 0.05 	 (loss)= 0.1447985073098206 	 (accuracy)= 72.27842304650099 % 	 time= 734.1031839847565
Test==: loss =  1.5837755915668494 	 accuracy= 1.6860247283626826 %

Train::: epoch= 4 	 lr= 0.05 	 (loss)= 0.13700752401204647 	 (accuracy)= 73.59185712501561 % 	 time= 898.3399035930634
Test==: loss =  1.617262648749734 	 accuracy= 1.798426376920195 %

Train::: epoch= 5 	 lr= 0.05 	 (loss)= 0.13228594369850588 	 (accuracy)= 74.54935264976478 % 	 time= 1158.6797499656677
Test==: loss =  1.5870880829217127 	 accuracy= 2.341701011614837 %

Train::: epoch= 6 	 lr= 0.05 	 (loss)= 0.12496166629831734 	 (

Test==: loss =  1.4456238837407545 	 accuracy= 5.001873360809292 %

Train::: epoch= 43 	 lr= 0.03415067276825353 	 (loss)= 0.04221329948381605 	 (accuracy)= 90.72270096998459 % 	 time= 11136.974091291428
Test==: loss =  1.4665848931223138 	 accuracy= 5.001873360809292 %

Train::: epoch= 44 	 lr= 0.03415067276825353 	 (loss)= 0.04188556457186938 	 (accuracy)= 90.88714041880021 % 	 time= 11396.205334663391
Test==: loss =  1.4324966244434256 	 accuracy= 5.039340576995129 %

Train::: epoch= 45 	 lr= 0.03415067276825353 	 (loss)= 0.041110777096126164 	 (accuracy)= 91.00162357936806 % 	 time= 11654.50598192215
Test==: loss =  1.4281510243512656 	 accuracy= 4.983139752716373 %

Train::: epoch= 46 	 lr= 0.03415067276825353 	 (loss)= 0.04105228658880757 	 (accuracy)= 90.89754797885183 % 	 time= 11913.10919046402
Test==: loss =  1.4236699732366545 	 accuracy= 5.039340576995129 %

Train::: epoch= 47 	 lr= 0.03415067276825353 	 (loss)= 0.041343583701011564 	 (accuracy)= 90.89754797885183 % 	 time=


Train::: epoch= 83 	 lr= 0.02332536901048666 	 (loss)= 0.04608401738402703 	 (accuracy)= 89.56329878023396 % 	 time= 21414.329174041748
Test==: loss =  1.3751255180756985 	 accuracy= 5.02060696890221 %

Train::: epoch= 84 	 lr= 0.02332536901048666 	 (loss)= 0.045794299804838456 	 (accuracy)= 89.6861079888431 % 	 time= 21673.379717111588
Test==: loss =  1.3637913279448632 	 accuracy= 4.983139752716373 %

Train::: epoch= 85 	 lr= 0.02332536901048666 	 (loss)= 0.045959883027646545 	 (accuracy)= 89.62782565255402 % 	 time= 21933.178502559662
Test==: loss =  1.3358917743203014 	 accuracy= 4.964406144623455 %

Train::: epoch= 86 	 lr= 0.02332536901048666 	 (loss)= 0.045847598629600424 	 (accuracy)= 89.55080970817203 % 	 time= 22192.35643339157
Test==: loss =  1.3224688436811365 	 accuracy= 5.001873360809292 %

Train::: epoch= 87 	 lr= 0.02332536901048666 	 (loss)= 0.04615826848019319 	 (accuracy)= 89.55080970817203 % 	 time= 22452.343126296997
Test==: loss =  1.3348343094356998 	 accuracy= 

Test==: loss =  1.3874947593185591 	 accuracy= 5.02060696890221 %

Train::: epoch= 124 	 lr= 0.01593154088551783 	 (loss)= 0.048548878792425296 	 (accuracy)= 88.82228050455852 % 	 time= 32026.012152910233
Test==: loss =  1.3939001117530678 	 accuracy= 5.058074185088048 %

Train::: epoch= 125 	 lr= 0.01593154088551783 	 (loss)= 0.04910427812771777 	 (accuracy)= 88.74734607218684 % 	 time= 32283.07087945938
Test==: loss =  1.3620157643379587 	 accuracy= 5.076807793180967 %

Train::: epoch= 126 	 lr= 0.01593154088551783 	 (loss)= 0.04921397689949556 	 (accuracy)= 88.67033012780485 % 	 time= 32543.19270515442
Test==: loss =  1.366653224995078 	 accuracy= 5.058074185088048 %

Train::: epoch= 127 	 lr= 0.01593154088551783 	 (loss)= 0.04944863724598198 	 (accuracy)= 88.72861246409391 % 	 time= 32799.48238158226
Test==: loss =  1.3162513921844692 	 accuracy= 5.058074185088048 %

Train::: epoch= 128 	 lr= 0.01593154088551783 	 (loss)= 0.0500941773204134 	 (accuracy)= 88.55376545522667 % 	 time=


Train::: epoch= 164 	 lr= 0.010881456789507428 	 (loss)= 0.05629158294260804 	 (accuracy)= 87.30485824903211 % 	 time= 42338.51597738266
Test==: loss =  1.1089523091798268 	 accuracy= 5.095541401273886 %

Train::: epoch= 165 	 lr= 0.010881456789507428 	 (loss)= 0.056003795613115664 	 (accuracy)= 87.30902127305275 % 	 time= 42591.95710372925
Test==: loss =  1.1047459091841842 	 accuracy= 5.076807793180967 %

Train::: epoch= 166 	 lr= 0.010881456789507428 	 (loss)= 0.05565887520914489 	 (accuracy)= 87.36105907331086 % 	 time= 42848.29813885689
Test==: loss =  1.1166502454288032 	 accuracy= 5.058074185088048 %

Train::: epoch= 167 	 lr= 0.010881456789507428 	 (loss)= 0.05612724027867118 	 (accuracy)= 87.31942883310437 % 	 time= 43101.96578454971
Test==: loss =  1.1067007535101328 	 accuracy= 5.058074185088048 %

Train::: epoch= 168 	 lr= 0.010881456789507428 	 (loss)= 0.056535927425873926 	 (accuracy)= 87.2965322009908 % 	 time= 43359.58807039261
Test==: loss =  1.0585420000273034 	 accu


Train::: epoch= 204 	 lr= 0.0074321814012071755 	 (loss)= 0.06722536387524683 	 (accuracy)= 85.32534032721368 % 	 time= 52605.07617545128


KeyboardInterrupt: 

In [33]:
print(confusion_mtx_parameters)

[[[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0], [1.0], [0.0], []], [[1.0], [1.0]

In [None]:
checkpoint = torch.load('lstm_single_char_50_mutation_bid_drop.pt')

In [10]:
torch.save(lstm_net, 'this_morning_w_do.pth')

  "type " + obj.__name__ + ". It won't be checked "


In [None]:
print(checkpoint['test_loss'])

In [None]:
 torch.save(lstm_net,'single_mistake_model_lstm_drop_out_1.pth')

In [68]:
checkpoint = torch.load('lstm_single_char_50_mutation_bid_drop_02.pt')

In [74]:
print(checkpoint[])
#  'incorrect_to_correct':confusion_mtx_parameters[0],  'correct_to_correct':confusion_mtx_parameters[2],
# #                 'correct_to_incorrect':confusion_mtx_parameters[1],'regenerate':confusion_mtx_parameters[3]
  
           

[0.008481162786483764, 0.009953457117080688, 0.008462512493133545, 0.008568865060806275, 0.008872705698013305, 0.009778851270675659, 0.009399718046188355, 0.009138405323028564, 0.008280891180038451, 0.00938764214515686, 0.009248465299606323, 0.010755127668380738, 0.009009695053100586, 0.008971762657165528, 0.01076977252960205, 0.01007135510444641, 0.010158282518386842, 0.008264505863189697, 0.008282959461212158, 0.01121429204940796, 0.008798831701278686, 0.009055852890014648, 0.010330718755722047, 0.007766813039779663, 0.009448951482772827, 0.009692203998565675, 0.00912320613861084, 0.009875631332397461, 0.008551460504531861, 0.01096159815788269, 0.009085208177566528, 0.009600162506103516, 0.00832710862159729, 0.010008686780929565, 0.009891873598098755, 0.010521680116653442, 0.010294747352600098, 0.009758317470550537, 0.010009801387786866, 0.008804881572723388, 0.008309823274612427, 0.009526288509368897, 0.008488410711288452, 0.008953988552093506, 0.009343469142913818, 0.00920448899269