In [2]:
from google.colab import drive
drive.mount('/content/drive/')


import os
os.chdir("drive/My Drive/Advanced NLP/Exam")

import numpy as np
import torch
import torch.nn as nn
import random
import torch.nn.functional as F
from torch import optim
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
plt.switch_backend('agg')
import matplotlib.ticker as ticker
from tqdm import tqdm
import numpy as np
%matplotlib inline 
from torch.nn.utils import clip_grad_value_


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Mounted at /content/drive/


In [5]:
SOS_token = 0
EOS_token = 1

class Format:
    def __init__(self, name):
        self.name = name
        self.word2index = {}
        self.index2word = {SOS_token: "SOS", EOS_token: "EOS"}
        self.n_words = 2  # Count SOS and EOS

    def addSentence(self, sentence):
        for word in sentence.split(' '):
            self.addWord(word)

    def addWord(self, word):
        if word not in self.word2index:
            self.word2index[word] = self.n_words
            self.index2word[self.n_words] = word
            self.n_words += 1
            
def readFile(filename):
    print("Reading lines...")

    # Read the file and split into lines
    lines = open('../SCAN-master/%s.txt' % filename, encoding='utf-8').read().strip().split('\n')

    pairs = [s[4:].split(' OUT: ') for s in lines]

    input_lang = Format("input")
    output_lang = Format("output")

    return input_lang, output_lang, pairs

def prepareData(filename):
    input_lang, output_lang, pairs = readFile(filename)
    print("Read %s sentence pairs" % len(pairs))

    print("Counting words...")
    for pair in pairs:
        input_lang.addSentence(pair[0])
        output_lang.addSentence(pair[1])
    print("Counted words:")
    print(input_lang.name, input_lang.n_words)
    print(output_lang.name, output_lang.n_words)
    return input_lang, output_lang, pairs


def indexesFromSentence(lang, sentence):
    return [lang.word2index[word] for word in sentence.split(' ')]


def tensorFromSentence(lang, sentence):
    indexes = indexesFromSentence(lang, sentence)
    indexes.append(EOS_token)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

def tensorsFromPair(pair):
    input_tensor = tensorFromSentence(input_lang, pair[0])
    target_tensor = tensorFromSentence(output_lang, pair[1])
    return (input_tensor, target_tensor)
 
class EncoderLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, dropout_prob=0.1, number_of_layers=1):
        super(EncoderLSTM, self).__init__()
        self.number_of_layers = number_of_layers
        self.hidden_size = hidden_size

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers=number_of_layers, dropout=dropout_prob)
        self.dropout = nn.Dropout(dropout_prob)

    def forward(self, input, hidden):
        embedded = (self.dropout(self.embedding(input))).view(1, 1, -1) # self.embedding(input).view(1, 1, -1)
        output = embedded
        output, hidden = self.lstm(output, hidden)
        return output, hidden

    def initHidden(self):
        if self.number_of_layers==1:
            return (torch.zeros(1, 1, self.hidden_size, device=device), torch.zeros(1, 1, self.hidden_size, device=device))
        else:
            return (torch.zeros(2, 1, self.hidden_size, device=device), torch.zeros(2, 1, self.hidden_size, device=device))

class AttnDecoderLSTM(nn.Module):
    def __init__(self, hidden_size, output_size, dropout_p=0.1, number_of_layers=1):
        super(AttnDecoderLSTM, self).__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        self.dropout_p = dropout_p
        self.number_of_layers=number_of_layers
        
        self.embedding = nn.Embedding(self.output_size, self.hidden_size)
        self.tanh = nn.Tanh()
        
        self.Ua = nn.Linear(self.hidden_size, self.hidden_size)
        self.Wa = nn.Linear(self.hidden_size, self.hidden_size)
        
        #self.va = torch.randn(1, hidden_size).clone().detach().requires_grad_(True)# torch.tensor(torch.randn(1, hidden_size), requires_grad=True)#.cuda()
        self.va = nn.Parameter(torch.zeros(1, hidden_size)) #nn.Parameter(torch.rand(hidden_size))
        
        self.attn_combine = nn.Linear(self.hidden_size * 2, self.hidden_size)
        
        self.dropout = nn.Dropout(self.dropout_p)
        self.lstm = nn.LSTM(self.hidden_size, self.hidden_size, num_layers=number_of_layers)
        self.out = nn.Linear(self.hidden_size * 2, self.output_size)
        
    def forward(self, input, hidden, encoder_hiddens):
        embedded = (self.dropout(self.embedding(input))).view(1, 1, -1)

        encoder_hiddens = encoder_hiddens.unsqueeze(1)

        attn_weights = F.softmax(torch.inner(
            self.va, self.tanh(self.Ua(encoder_hiddens) + self.Wa(hidden[0]))), dim=1)

        context = torch.sum(
            torch.mul(attn_weights, encoder_hiddens.squeeze()), dim=1)

        output = torch.cat((embedded[0], context), 1)
        output = self.attn_combine(output).unsqueeze(0)

        output = F.relu(output)
        output, hidden = self.lstm(output, hidden)

        cat_output = torch.cat((context, output[0]), 1)
        output = F.log_softmax(self.out(cat_output), dim=1)

        return output, hidden, attn_weights
    
    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)
    
   
import time
import math

def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

def showPlot(points, accuracy):
    plt.figure()
    fig, ax = plt.subplots()
    loc = ticker.MultipleLocator(base=0.2)
    ax.yaxis.set_major_locator(loc)
    plt.plot(points)
    plt.plot(accuracy)
    plt.show()

def train2(input_tensor, target_tensor, encoder, decoder, 
          encoder_optimizer, decoder_optimizer, criterion, seed,
          model):
  
    teacher_forcing_ratio=0.5
    torch.manual_seed(seed)

    encoder_hidden = encoder.initHidden()
    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()
    
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    
    encoder_hiddens = torch.zeros(input_length, encoder.hidden_size, device=device)
    
    loss = 0
    gold_pred = 0
    
    for ei in range(input_length):
        encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden)#encoder_hidden
        if model=="lstm":
            encoder_hiddens[ei] = encoder_hidden[0][0, 0]
        elif model=="gru":
            encoder_hiddens[ei] = encoder_hidden[0, 0]
        
    decoder_input = torch.tensor([[SOS_token]], device=device)
    decoder_hidden = encoder_hidden
    
    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
    
    if use_teacher_forcing:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_hiddens)
            topv, topi = decoder_output.topk(1)
            pred = topi.squeeze()
            
            if torch.equal(pred, target_tensor[di].squeeze()):
                gold_pred += 1
            
            loss += criterion(decoder_output, target_tensor[di])
            decoder_input = target_tensor[di]
            
    else:
        for di in range(target_length):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_hiddens)
            topv, topi = decoder_output.topk(1)
            pred = topi.squeeze()
            decoder_input = topi.squeeze().detach()
            
            if torch.equal(pred, target_tensor[di].squeeze()):
                gold_pred += 1
            
            loss += criterion(decoder_output, target_tensor[di])
            if decoder_input.item() == EOS_token:
                target_length = di + 1
                break

    loss.backward()
    
    torch.nn.utils.clip_grad_norm_(encoder.parameters(), max_norm=5.0)
    torch.nn.utils.clip_grad_norm_(decoder.parameters(), max_norm=5.0)
    encoder_optimizer.step()
    decoder_optimizer.step()
    
    return loss.item() / target_length, gold_pred, target_length

def trainIters2(encoder, decoder, n_iters, data, seed, print_every=1000, plot_every=100,
               learning_rate=0.001, model="gru"):
    start = time.time()
    
    plot_losses = []
    plot_accs = []
    print_loss_total = 0
    plot_loss_total = 0
    print_pred_total = 0
    print_label_total = 0
    plot_pred_total = 0
    plot_label_total = 0
    
    encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate)
    training_pairs = [tensorsFromPair(random.choice(data))
                      for i in range(n_iters)]
    criterion = nn.NLLLoss()
    
    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        # target_length = target_tensor.size(0)
        
        loss, gold_pred, target_length = train2(input_tensor, target_tensor, encoder,
                     decoder, encoder_optimizer, decoder_optimizer, criterion, seed, model=model)
        print_label_total += target_length
        print_pred_total += gold_pred
        plot_label_total += target_length
        plot_pred_total += gold_pred
        print_loss_total += loss
        plot_loss_total += loss
        
        if iter % print_every == 0:
            print_acc_avg = print_pred_total / print_label_total
            print_loss_avg = print_loss_total / print_every
            print_pred_total = 0
            print_label_total = 0
            print_loss_total = 0
            print('%s (%d %d%%) loss: %.4f acc: %.4f' % (timeSince(start, iter / n_iters),
                                iter, iter / n_iters * 100, print_loss_avg, print_acc_avg))
            
        if iter % plot_every == 0:
            plot_acc_avg = plot_pred_total / plot_label_total
            plot_loss_avg = plot_loss_total / plot_every
            plot_losses.append(plot_loss_avg)
            plot_accs.append(plot_acc_avg)
            plot_loss_total = 0
            plot_pred_total = 0
            plot_label_total = 0
            
    showPlot(plot_losses, plot_accs)
    return encoder, decoder

def evaluate(encoder, decoder, sentence):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size(0)
        encoder_hidden = encoder.initHidden()

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []

        while(True):
            decoder_output, decoder_hidden = decoder(
                decoder_input, decoder_hidden)
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words
    return sum/len(pairs)


def accuracy(encoder, decoder, pairs):
    sum = 0
    i = 0
    p = math.floor(len(pairs)/100)
    for pair in pairs:
        i+=1
        if (i % p == 0):
            print("%.2f" % (i/len(pairs))) 
        output_words = evaluate(encoder, decoder, pair[0])[0:-1]
        output_sentence = ' '.join(output_words)
        if output_sentence == pair[1]:
            sum +=1
    return sum/len(pairs)



def evaluateRandomly(encoder, decoder, n=10):
    for i in range(n):
        pair = random.choice(pairs_test)
        print('Input', pair[0])
        print('Real:', pair[1])
        output_words = evaluate(encoder, decoder, pair[0])[0:-1]
        output_sentence = ' '.join(output_words)
        print('Pred:', output_sentence)
        print('')
        
def evaluate_attention(encoder, decoder, sentence):
    with torch.no_grad():
        input_tensor = tensorFromSentence(input_lang, sentence)
        input_length = input_tensor.size()[0]
        encoder_hidden = encoder.initHidden()

        encoder_outputs = torch.zeros(input_length, encoder.hidden_size, device=device)

        for ei in range(input_length):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] += encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)  # SOS

        decoder_hidden = encoder_hidden

        decoded_words = []
        decoder_attentions = torch.zeros(input_length, input_length)

        di = 0
        while(True):
            decoder_output, decoder_hidden, decoder_attention = decoder(
                decoder_input, decoder_hidden, encoder_outputs)
            if(di < input_length):
                decoder_attentions[di] = decoder_attention.data.squeeze()
                di +=1
            topv, topi = decoder_output.data.topk(1)
            if topi.item() == EOS_token:
                decoded_words.append('')
                break
            else:
                decoded_words.append(output_lang.index2word[topi.item()])

            decoder_input = topi.squeeze().detach()

        return decoded_words, decoder_attentions[:di + 1]

def accuracy_attention(encoder, decoder, pairs):
    sum = 0
    i = 0
    p = math.floor(len(pairs)/100)
    for pair in pairs:
        i+=1
        if (i % p == 0):
            print("%.2f" % (i/len(pairs))) 
        output_words, attention = evaluate_attention(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words[0:-1])
        if output_sentence == pair[1]:
            sum +=1
    return sum/len(pairs)

def evaluateRandomly_attention(encoder, decoder, pairs, n=10):
    for i in range(n):
        pair = random.choice(pairs)
        print('Input', pair[0])
        print('Real:', pair[1])
        output_words, attention = evaluate_attention(encoder, decoder, pair[0])
        output_sentence = ' '.join(output_words[0:-1])
        print('Pred:', output_sentence)
        print('')


#taken from pytorch tutorial
def showAttention(input_sentence, output_words, attentions):
    # Set up figure with colorbar
    fig = plt.figure()
    ax = fig.add_subplot(111)
    cax = ax.matshow(attentions.numpy(), cmap='bone')
    fig.colorbar(cax)

    # Set up axes
    ax.set_xticklabels([''] + input_sentence.split(' ') +
                       ['<EOS>'], rotation=90)
    ax.set_yticklabels([''] + output_words)

    # Show label at every tick
    ax.xaxis.set_major_locator(ticker.MultipleLocator(1))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(1))

    plt.show()


def evaluateAndShowAttention(input_sentence, encoder, decoder):
    output_words, attentions = evaluate_attention(
        encoder, decoder, input_sentence)
    print('input =', input_sentence)
    print('output =', ' '.join(output_words))
    showAttention(input_sentence, output_words, attentions)


In [None]:
prim_extra_dict = {"Run":[], "Accuracy":[], "Seed":[], "N_Comp":[]}

amounts = [1,2,4,8,16,32]
rep = [1,2,3,4,5]

for amount in amounts:
    for reps in rep:
              prim_extra_dict["N_Comp"].append(amount)
              print("Comp amount is: ", amount)

              # For every comp amount
              # Run a model 5 times and plot results
              input_lang, output_lang, pairs_comp = prepareData(f"add_prim_split/with_additional_examples/tasks_train_addprim_complex_jump_num{amount}_rep{reps}")
              input_lang_test, output_lang_test, pairs_test_comp = prepareData(f"add_prim_split/with_additional_examples/tasks_test_addprim_complex_jump_num{amount}_rep{reps}")
              
              hidden_size = 100
              input_size = input_lang.n_words
              output_size = output_lang.n_words

              # Using LSTM
              encoder1lstm = EncoderLSTM(input_lang.n_words, hidden_size, dropout_prob=0.1, number_of_layers=1).to(device)
              decoder1lstm = AttnDecoderLSTM(hidden_size, output_size, dropout_p=0.1, number_of_layers=1).to(device)
              
              encoder1lstm, decoder1lstm = trainIters2(encoder1lstm, decoder1lstm, n_iters=10000, print_every=2000, data=pairs_comp, seed=42, model="lstm")
              print("Accessing accuracies")
              
              torch.save(encoder1lstm.state_dict(), f"encoder{amount}{reps}.pt")
              torch.save(decoder1lstm.state_dict(), f"decoder{amount}{reps}.pt")

              accuracy_iter = accuracy_attention(encoder1lstm, decoder1lstm, pairs_test_comp)

              prim_extra_dict["Accuracy"].append(accuracy_iter)

