In [1]:
import torch 
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import pandas as pd
import numpy as np 
import time 
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import random_split
import math
import string

from global_defs import *

In [2]:
train_data_1=torch.load('train_data_50_mut_1_c.pt')
test_data_1=torch.load('test_data_50_mut_1_c.pt')
train_labels_1=torch.load('train_labels_50_mut_1_c.pt')
test_labels_1=torch.load('test_labels_50_mut_1_c.pt')

In [3]:
num_train_words=len(train_data_1)
num_test_words=len(test_data_1)

In [4]:
num_train_words=48042
num_test_words=5338

In [6]:
class Recurrent_Layer(nn.Module):
    def __init__(self, vocab_size, hidden_size):
        super(Recurrent_Layer, self).__init__()
        self.layer1 = nn.Embedding (vocab_size, hidden_size)
        self.lstm=nn.LSTM( hidden_size, hidden_size, bidirectional=True)
        self.fc=nn.Linear( 2* hidden_size, vocab_size)

    def forward(self, x, h_init,c_init):
        g_seq          =self.layer1(x)
        h_seq, (h_final, c_final) =self.lstm(g_seq, (h_init,c_init))
        score_seq      =self.fc(h_seq)
        
        return score_seq,  h_final , c_final

In [7]:
# create the network
hidden_size= 256
vocab_size = 26
batch_of_words = 1
lstm_net = Recurrent_Layer(vocab_size,hidden_size).cuda()
print(lstm_net)

# send to device
lstm_net = lstm_net.to(device)

# define the loss function
criterion = nn.CrossEntropyLoss()

# define the learning rate
my_lr = 0.05
loss_plt=[]
acc_plt=[]

Recurrent_Layer(
  (layer1): Embedding(26, 256)
  (lstm): LSTM(256, 256, bidirectional=True)
  (fc): Linear(in_features=512, out_features=26, bias=True)
)


## Utlitites 

In [8]:
def get_error( scores , labels ):

    bs=scores.size(0)
    predicted_labels = scores.argmax(dim=1)
    indicator = (predicted_labels == labels)
    num_matches=indicator.sum()
    corr=num_matches/len(predicted_labels)
    return corr.item()

def save_model_parameters():
    torch.save({
                'epoch': epoch,
                'lstm_model':lstm_net.state_dict(),
                'lstm_optimizer': lstm_optimizer.state_dict(),
                'train_loss': loss_plt, 'train_accuracy': accu_plt, 
                'test_loss': test_loss_plt,
                'test_accuracy': accuracy_test_plt, 'confusion matrix_parameters':confusion_mtx_parameters}, 'LSTM+Biderctional.pt')

#                 'incorrect_to_correct':confusion_param[0],  'correct_to_correct':confusion_param[2],
#                 'correct_to_incorrect':confusion_param[1],'regenerate':confusion_param[3]
               
        
def confusion_parameters(scores,target_tensor,inpute_tensor,conf_counter):

    
    if torch.all(torch.eq(target_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making incorrect->correct
        conf_counter[0] +=1

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==0 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->incorrect

        conf_counter[1] +=1    


    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==1: ### making correct->correct

        conf_counter[2] +=1                                                               

    if torch.all(torch.eq(inpute_tensor, scores.argmax(dim=1)))==1 and torch.all(torch.eq(inpute_tensor,target_tensor))==0: ### making inccorrect->correct or regenerate the input 

        conf_counter[3] +=1  
    return conf_counter


## Evaluation

In [9]:
def eval_on_test_set():
    
    # to deactivate dropout regularization during testing
    lstm_net.eval()
    # initilizations 
    running_loss=0; num_batches=0;num_matches=0 ;acc_plt=[];loss_plt=[]
    
    # counts for computing correct and incorrect proportion
    count=torch.zeros(4)
    cnt_list=torch.zeros(4)
    cnf_mtx_count=torch.zeros(4)

    for i in range(0,num_test_words):
        word_length = len(test_data_1 [i])
        
        # load data
        inpute_tensor = test_data_1[i].cuda()
        target_tensor = test_labels_1[i].cuda()
        
        # sending to GPU
        inpute_tensor = inpute_tensor.to(device)
        target_tensor = target_tensor.to(device)
        
        h = torch.zeros(2,batch_of_words,  hidden_size).cuda()
        c = torch.zeros(2,batch_of_words,  hidden_size).cuda()
        h = h.to(device)
        c = c.to(device)
    
        # forward pass
        scores_char, h, c = lstm_net(inpute_tensor.view(word_length,1), h, c)
        
        # reshape before calculating the loss for easier slicing of mini batch of words
        scores_char = scores_char.view(word_length*batch_of_words,vocab_size)
        target_tensor = target_tensor.contiguous()
        target_tensor = target_tensor.view(word_length*batch_of_words)
        
        # calculating the loss of batch of character sthat constrcut a words
        loss_char = criterion(scores_char, target_tensor)# do we need to add the loss for every 
        
        # accumalate the loss
        running_loss += loss_char.item()
        num_batches +=1 
        
        # computing accuracy
        num_matches += get_error(scores_char, target_tensor)
   
   #=======================================================#
        # compute confusion paramters for every word
        cnf_mtx_count=confusion_parameters(scores_char,target_tensor,inpute_tensor,count)
        #============================================# 
        

    ###====Different Metrics for evaluation=============###
    
    # counter for computing confusion matrix
#     cnt_list[0].append(cnf_mtx_count[0].item())
#     cnt_list[1].append(cnf_mtx_count[1].item())    
#     cnt_list[2].append(cnf_mtx_count[2].item())
#     cnt_list[3].append(cnf_mtx_count[3].item())
    
    # accuracy and loss 
    accuracy= (num_matches/num_test_words)*100
    acc_plt.append(accuracy)
    total_loss = running_loss/num_batches 
    loss_plt.append(total_loss)
    
    # printing results 
    print('Test==: loss = ',(total_loss),'\t accuracy=', accuracy,'%' )
    return acc_plt, loss_plt, cnf_mtx_count

In [133]:
eval_on_test_set()

Test==: loss =  0.1131328574592399 	 accuracy= 83.94529786436867 %


([83.94529786436867],
 [0.1131328574592399],
 [[2235.0], [18.0], [2246.0], [387.0], []])

In [10]:
start=time.time()


accu_plt=[]
loss_plt=[]
test_loss_plt=[]
accuracy_test_plt=[]
confusion_mtx_parameters=[]
# main loop
for epoch in range(1,1000):
    
    # to activate dropout during training 
    lstm_net.train()
    
    # divide the learning rate by 3 except after the first epoch
    if epoch % 10==0:
        my_lr = my_lr / 1.1
#         torch.save(lstm_net, 'single_mistake_model_lstm_40_correct_1.pth')
    
    # create a new optimizer at the beginning of each epoch: give the current learning rate.   
    lstm_optimizer=torch.optim.SGD( lstm_net.parameters() , lr=my_lr )
    
   
        
    # set the running quatities to zero at the beginning of the epoch
    running_loss = 0
    num_batches = 0    
    num_matches = 0
    
    # loop across batch of words
    for i in range(0,num_train_words):
             # set the initial h and c to be the zero vector
        h = torch.zeros(2,batch_of_words,  hidden_size).cuda()
        c = torch.zeros(2,batch_of_words,  hidden_size).cuda()
        h = h.to(device)
        c = c.to(device)
        
        word_length = len(train_data_1 [i])
        
        # Set the gradients to zeros
        lstm_optimizer.zero_grad()
        
        # load the data
        minibatch_words  = train_data_1[i ]
        minibatch_labels = train_labels_1[ i]
        
        # send to GPU
        minibatch_words  = minibatch_words.to(device)
        minibatch_labels = minibatch_labels.to(device)

        # Detach to prevent from backpropagating all the way to the beginning
        # Then tell Pytorch to start tracking all operations that will be done on h and c
        h = h.detach()
        c = c.detach()
        h = h.requires_grad_()
        c = c.requires_grad_()

        # forward pass
        scores_char, h, c = lstm_net(minibatch_words.view(word_length,1), h, c)

        # reshape before calculating the loss for easier slicing of mini batch of words
        scores_char = scores_char.view(  word_length*batch_of_words , vocab_size)
        minibatch_labels = minibatch_labels.contiguous()
        minibatch_labels = minibatch_labels.view(word_length*batch_of_words )
        
        # calculating the loss of batch of character sthat constrcut a words
        loss_char = criterion(scores_char, minibatch_labels)

        # summation of both lossses to 
        combined_loss = loss_char

        # backward pass to compute dL/dR, dL/dV and dL/dW
        combined_loss.backward()

        # update the wights 
        lstm_optimizer.step()

        # update the running loss  
        running_loss += combined_loss.detach().item()
        num_batches += 1
        num_matches += get_error(scores_char, minibatch_labels)
        
        # end of iteration 
        
    # compute for full training set
    total_loss = running_loss/num_batches
    loss_plt.append(total_loss)
    accuracy = (num_matches/num_train_words)*100
    acc_plt.append(accuracy)
    
    # Compute the time 
    elapsed = time.time()-start
    
    print('')
    print('Train:::', 'epoch=',epoch,'\t lr=', my_lr, '\t (loss)=',(total_loss),'\t (accuracy)=' , (accuracy),'%','\t time=', elapsed)
    
    # evaluate on test set to monitor the loss 
    acc_tst_plt, loss_tst_plt, confusion_param=eval_on_test_set()
    
    # saving test parameters  
    test_loss_plt.append(loss_tst_plt)
    accuracy_test_plt.append(acc_tst_plt)
    confusion_mtx_parameters.append(confusion_param)
    #### Saving model parameters every epoch  
    save_model_parameters()


Train::: epoch= 1 	 lr= 0.05 	 (loss)= 0.1683590938748826 	 (accuracy)= 68.53378293992756 % 	 time= 242.80848741531372
Test==: loss =  1.5605131608783551 	 accuracy= 2.0232296740352194 %

Train::: epoch= 2 	 lr= 0.05 	 (loss)= 0.1037016541074899 	 (accuracy)= 79.80933349985429 % 	 time= 507.6129548549652
Test==: loss =  1.4938455778578927 	 accuracy= 2.9973772948669914 %

Train::: epoch= 3 	 lr= 0.05 	 (loss)= 0.08805007854499278 	 (accuracy)= 82.64227134590567 % 	 time= 771.8383738994598
Test==: loss =  1.474786675305714 	 accuracy= 2.941176470588235 %

Train::: epoch= 4 	 lr= 0.05 	 (loss)= 0.07622907352850429 	 (accuracy)= 84.53020273926981 % 	 time= 1037.368916273117
Test==: loss =  1.3896928452788229 	 accuracy= 4.571000374672162 %

Train::: epoch= 5 	 lr= 0.05 	 (loss)= 0.06900725877822109 	 (accuracy)= 85.77702843345406 % 	 time= 1301.1735048294067
Test==: loss =  1.2630484622501672 	 accuracy= 5.488947171225178 %

Train::: epoch= 6 	 lr= 0.05 	 (loss)= 0.061162211288743724 	 (


Train::: epoch= 42 	 lr= 0.03415067276825353 	 (loss)= 0.004241817885135002 	 (accuracy)= 99.60867574205903 % 	 time= 11080.738979816437
Test==: loss =  0.41917407474071233 	 accuracy= 52.285500187336076 %

Train::: epoch= 43 	 lr= 0.03415067276825353 	 (loss)= 0.0041051044837509916 	 (accuracy)= 99.62116481412097 % 	 time= 11340.830182313919
Test==: loss =  0.40457955193662504 	 accuracy= 53.259647808167855 %

Train::: epoch= 44 	 lr= 0.03415067276825353 	 (loss)= 0.004113597120300827 	 (accuracy)= 99.60867574205903 % 	 time= 11601.667145967484
Test==: loss =  0.38084777773378076 	 accuracy= 55.226676657924315 %

Train::: epoch= 45 	 lr= 0.03415067276825353 	 (loss)= 0.004185127791977157 	 (accuracy)= 99.59826818200742 % 	 time= 11865.776890993118
Test==: loss =  0.3734520086096365 	 accuracy= 55.86361933308355 %

Train::: epoch= 46 	 lr= 0.03415067276825353 	 (loss)= 0.00414937082921051 	 (accuracy)= 99.60034969401774 % 	 time= 12128.36697268486
Test==: loss =  0.36064012315739113 	


Train::: epoch= 82 	 lr= 0.02332536901048666 	 (loss)= 0.0028763645252432558 	 (accuracy)= 99.58369759793514 % 	 time= 21605.56140947342
Test==: loss =  0.1768150907093711 	 accuracy= 76.82652678905957 %

Train::: epoch= 83 	 lr= 0.02332536901048666 	 (loss)= 0.0029180383626455 	 (accuracy)= 99.56496398984223 % 	 time= 21868.939032554626
Test==: loss =  0.18047267203602926 	 accuracy= 76.17085050580742 %

Train::: epoch= 84 	 lr= 0.02332536901048666 	 (loss)= 0.003169403350389399 	 (accuracy)= 99.53374130968736 % 	 time= 22129.991582155228
Test==: loss =  0.17986105850077294 	 accuracy= 76.6579243162233 %

Train::: epoch= 85 	 lr= 0.02332536901048666 	 (loss)= 0.0029381332696562573 	 (accuracy)= 99.56496398984223 % 	 time= 22395.70442533493
Test==: loss =  0.1750762851679036 	 accuracy= 76.8639940052454 %

Train::: epoch= 86 	 lr= 0.02332536901048666 	 (loss)= 0.0028798215005314265 	 (accuracy)= 99.57745306190417 % 	 time= 22658.59889125824
Test==: loss =  0.17367555268382406 	 accura

Test==: loss =  0.13337158551490547 	 accuracy= 81.99700262270512 %

Train::: epoch= 122 	 lr= 0.01593154088551783 	 (loss)= 0.0025135403738859256 	 (accuracy)= 99.53165979767704 % 	 time= 32151.854460716248
Test==: loss =  0.1349987032384996 	 accuracy= 82.0719370550768 %

Train::: epoch= 123 	 lr= 0.01593154088551783 	 (loss)= 0.002556492105339859 	 (accuracy)= 99.51500770159444 % 	 time= 32414.81303715706
Test==: loss =  0.13483545030543717 	 accuracy= 81.92206819033346 %

Train::: epoch= 124 	 lr= 0.01593154088551783 	 (loss)= 0.0025111615925314805 	 (accuracy)= 99.537904333708 % 	 time= 32680.639077425003
Test==: loss =  0.13338000805797512 	 accuracy= 82.24053952791309 %

Train::: epoch= 125 	 lr= 0.01593154088551783 	 (loss)= 0.0025060911534077837 	 (accuracy)= 99.53165979767704 % 	 time= 32946.93550348282
Test==: loss =  0.13348804667158384 	 accuracy= 82.37167478456351 %

Train::: epoch= 126 	 lr= 0.01593154088551783 	 (loss)= 0.0024604788801000804 	 (accuracy)= 99.53790433370

Test==: loss =  0.1218725603567849 	 accuracy= 83.7579617834395 %

Train::: epoch= 162 	 lr= 0.010881456789507428 	 (loss)= 0.0017510980032019573 	 (accuracy)= 99.55663794180093 % 	 time= 42638.79274725914
Test==: loss =  0.12178757325783338 	 accuracy= 83.77669539153241 %

Train::: epoch= 163 	 lr= 0.010881456789507428 	 (loss)= 0.0017447787189965476 	 (accuracy)= 99.55663794180093 % 	 time= 42900.822179317474
Test==: loss =  0.1217101141236621 	 accuracy= 83.77669539153241 %

Train::: epoch= 164 	 lr= 0.010881456789507428 	 (loss)= 0.0017395514890206907 	 (accuracy)= 99.55663794180093 % 	 time= 43163.90292620659
Test==: loss =  0.12163943295142332 	 accuracy= 83.77669539153241 %

Train::: epoch= 165 	 lr= 0.010881456789507428 	 (loss)= 0.001734843363735887 	 (accuracy)= 99.55663794180093 % 	 time= 43428.38034033775
Test==: loss =  0.1215746999771375 	 accuracy= 83.81416260771825 %

Train::: epoch= 166 	 lr= 0.010881456789507428 	 (loss)= 0.001730443725334242 	 (accuracy)= 99.55663794

KeyboardInterrupt: 

In [117]:
torch.save(lstm_net, 'this_morning_wo_do.pth')

In [134]:
checkpoint = torch.load('single_mistake_model_lstm_40_correct_2.pt')

FileNotFoundError: [Errno 2] No such file or directory: 'single_mistake_model_lstm_40_correct_2.pt'

In [126]:
print(checkpoint['correct_to_correct'])

[5293.0]
