In [1]:
from __future__ import unicode_literals, print_function, division
from io import open
import unicodedata
import string
import re
import random
import time
import math
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
plt.switch_backend('agg')
import matplotlib.ticker as ticker
import numpy as np
from os import system
from nltk.translate.bleu_score import SmoothingFunction, sentence_bleu

In [2]:

device = torch.device("cuda")

#----------Hyper Parameters----------#
hidden_size = 512
#The number of vocabulary
vocab_size = 28
teacher_forcing_ratio = 1.0
LR = 0.05

In [3]:
################################
#Example inputs of compute_bleu
################################
#The target word

# #The word generated by your model
reference = 'variable'
output = 'varable'

# reference = ['v','a','r','i','a','b','l','e']
# output = ['v','a','r','a','b','l','e']

#compute BLEU-4 score
def compute_bleu(output, reference):
    cc = SmoothingFunction()
    if len(reference) == 3:
        weights = (0.33,0.33,0.33)
    else:
        weights = (0.25,0.25,0.25,0.25)
    return sentence_bleu([reference], output,weights=weights,smoothing_function=cc.method1)
compute_bleu(output,reference)

0.5154486831107657

In [4]:
#Encoder
class EncoderRNN(nn.Module):
    def __init__(self, input_size, hidden_size):
        super(EncoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size=1
        self.num_layers=4

        self.embedding = nn.Embedding(input_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,self.num_layers,dropout=0.2)

    def forward(self, input, hidden):
        embedded = self.embedding(input).view(1, 1, -1)
        h0 = torch.randn(self.num_layers, self.batch_size, self.hidden_size).to(device)
        c0 = torch.randn(self.num_layers, self.batch_size, self.hidden_size).to(device)
        output=embedded.view(int(embedded.size(2)/hidden_size) ,1, -1)
        output, (hn,cn) = self.lstm(output, (h0,c0))

        return hn,cn

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [5]:
#Decoder
class DecoderRNN(nn.Module):
    def __init__(self, hidden_size, output_size):
        super(DecoderRNN, self).__init__()
        self.hidden_size = hidden_size
        self.batch_size=1
        self.num_layers=4
        self.embedding = nn.Embedding(output_size, hidden_size)
        self.lstm = nn.LSTM(hidden_size, hidden_size,self.num_layers,dropout=0.2)
        self.out = nn.Linear(hidden_size, output_size)
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, input, hn,cn):
        output = self.embedding(input).view(1, 1, -1)
        output = F.relu(output)
        output,(hn,cn) = self.lstm(output,(hn,cn))
        output = self.out(output[0])
        return output, hn,cn

    def initHidden(self):
        return torch.zeros(1, 1, self.hidden_size, device=device)

In [6]:
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=20):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)

    loss = 0

    #----------sequence to sequence part for encoder----------#
    encoder_hidden, encoder_cell = encoder(input_tensor, encoder_hidden)


    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden
    decoder_cell   =encoder_cell

    use_teacher_forcing = True if random.random() < teacher_forcing_ratio else False
	

    #----------sequence to sequence part for decoder----------#
    if use_teacher_forcing:
        # Teacher forcing: Feed the target as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden,decoder_cell = decoder(
                decoder_input, decoder_hidden,decoder_cell)
            loss += criterion(decoder_output.squeeze(), target_tensor[di])
            decoder_input = target_tensor[di]  # Teacher forcing

    else:
        # Without teacher forcing: use its own predictions as the next input
        for di in range(target_length):
            decoder_output, decoder_hidden,decoder_cell = decoder(
                decoder_input, decoder_hidden,decoder_cell)
            topv, topi = decoder_output.topk(1)
            decoder_input = topi.squeeze().detach()  # detach from history as input

            loss += criterion(decoder_output.squeeze(), target_tensor[di])
            if decoder_input.item() == EOS_token:
                break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    
    

    return loss.item() / target_length

In [7]:
def asMinutes(s):
    m = math.floor(s / 60)
    s -= m * 60
    return '%dm %ds' % (m, s)

In [8]:
def timeSince(since, percent):
    now = time.time()
    s = now - since
    es = s / (percent)
    rs = es - s
    return '%s (- %s)' % (asMinutes(s), asMinutes(rs))

In [9]:
import matplotlib.pyplot as plt
def trainIters(encoder, decoder, n_iters, print_every=1000, plot_every=100, learning_rate=0.01):
    start = time.time()
    plot_losses = []
    print_loss_total = 0  # Reset every print_every
    plot_loss_total = 0  # Reset every plot_every

    encoder_optimizer = optim.SGD(encoder.parameters(), lr=learning_rate)
    decoder_optimizer = optim.SGD(decoder.parameters(), lr=learning_rate)
    # your own dataloader
    training_pairs =train_pair

    criterion = nn.CrossEntropyLoss()
    for i in range(40):
        for iter in range(1, n_iters + 1):
            training_pair = training_pairs[iter - 1]
            input_tensor = training_pair[0]
            target_tensor = training_pair[1]

            loss = train(input_tensor, target_tensor, encoder,
                         decoder, encoder_optimizer, decoder_optimizer, criterion)

            print_loss_total += loss
            plot_loss_total += loss
            

            if iter % print_every == 0:
                print_loss_avg = print_loss_total / print_every
                print_loss_total = 0
                print("Now is epoch:",i)
                print('%s (%d %d%%) %.4f' % (timeSince(start, iter / n_iters),
                                             iter, iter / n_iters * 100, print_loss_avg))
                encoder_hidden = encoder.initHidden()
                encoder_optimizer.zero_grad()
                decoder_optimizer.zero_grad()

                input_length = input_tensor.size(0)
                target_length = target_tensor.size(0)

                encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
        plot_losses.append(plot_loss_total)
            
    
    print('Finished Training Trainset')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.plot(plot_losses)
    plt.show()

In [10]:
def test(input_tensor, target_tensor, encoder, decoder):
    encoder_hidden = encoder.initHidden()
    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)
    encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device)
    #----------sequence to sequence part for encoder----------#
    encoder_hidden, encoder_cell = encoder(input_tensor, encoder_hidden)


    decoder_input = torch.tensor([[SOS_token]], device=device)

    decoder_hidden = encoder_hidden
    decoder_cell   =encoder_cell
    tar=[]
    for di in range(target_length):
        decoder_output, decoder_hidden,decoder_cell = decoder(
            decoder_input, decoder_hidden,decoder_cell)
        topv, topi = decoder_output.topk(1)
        decoder_input = topi.squeeze().detach()  # detach from history as input

        if decoder_input.item() == EOS_token:
            break
        else:
            tar.append(chr(topi+96))
    return tar
import sys 


def testIters(encoder, decoder,n_iters):
    training_pairs =test_pair
    total=0
    stdoutOrigin=sys.stdout 
    sys.stdout = open("test.txt", "w") 
    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        y=[]
        x=[]
        for i in input_tensor:
            x.append(chr(i+96))
        for i in target_tensor:
            y.append(chr(i+96))

        tar = test(input_tensor, target_tensor, encoder,decoder)
        total+=compute_bleu(tar,y)
        print("input: ", "".join(str(e)for e in x))
        print("target: ", "".join(str(e)for e in y))
        print("pred: ", "".join(str(e)for e in tar))
        print("-----------------------------------")
        print("-----------------------------------")
    print("BLEU-4 score of the test: "+str(total/50))
    sys.stdout.close()
    sys.stdout=stdoutOrigin
def newtestIters(encoder, decoder,n_iters):
    training_pairs =new_test_pair
    total=0
    stdoutOrigin=sys.stdout 
    sys.stdout = open("new_test.txt", "w") 
    for iter in range(1, n_iters + 1):
        training_pair = training_pairs[iter - 1]
        input_tensor = training_pair[0]
        target_tensor = training_pair[1]
        y=[]
        x=[]
        for i in target_tensor:
            y.append(chr(i+96))
        for i in input_tensor:
            x.append(chr(i+96))

        tar = test(input_tensor, target_tensor, encoder,decoder)
        total+=compute_bleu(tar,y)
        print("input: ", "".join(str(e)for e in x))
        print("target: ", "".join(str(e)for e in y))
        print("pred: ", "".join(str(e)for e in tar))
        print("-----------------------------------")
        print("-----------------------------------")
    print("BLEU-4 score of the new test: "+str(total/50))
    sys.stdout.close()
    sys.stdout=stdoutOrigin
        

In [11]:
#----------------
#---------------------load daaaaaaaaaaaaaaattttttttttttttaaaaaaaaaaaaaaaaaa
import json
import pprint
data=json.load(open("./train.json"))
train_pair=[]
test_pair=[]
new_test_pair=[]
SOS_token = 0
EOS_token = 28
max_length=20
for dt in data:
    for i in range(len(dt["input"])):
        inp=[]
        tar=[]
        inp=[ord(x)-96 for x in dt["input"][i] ]
        inp=torch.Tensor(inp).to(torch.int64)
        inp=inp.to(device)
        tar=[ord(x)-96 for x in dt["target"] ]
        tar=torch.Tensor(tar).to(torch.int64)
        tar=tar.to(device)
        train_pair.append([inp,tar])
# train_pair = torch.Tensor(train_pair).to(torch.int64)


data=json.load(open("./test.json"))
for dt in data:
    for i in range(len(dt["input"])):
        inp=[]
        tar=[]
        inp=[ord(x)-96 for x in dt["input"][i] ]
        inp=torch.Tensor(inp).to(torch.int64)
        
        tar=[ord(x)-96 for x in dt["target"] ]
        tar=torch.Tensor(tar).to(torch.int64)
        
        inp=inp.to(device)
        tar=tar.to(device)
        test_pair.append([inp,tar])
# test_pair = torch.Tensor(test_pair).to(torch.int64)

data=json.load(open("./new_test.json"))
for dt in data:
    for i in range(len(dt["input"])):
        inp=[]
        tar=[]
        inp=[ord(x)-96 for x in dt["input"][i] ]
        inp=torch.Tensor(inp).to(torch.int64)
        tar=[ord(x)-96 for x in dt["target"] ]
        tar=torch.Tensor(tar).to(torch.int64)
        inp=inp.to(device)
        tar=tar.to(device)
        new_test_pair.append([inp,tar])
# new_test_pair = torch.Tensor(new_test_pair).to(torch.int64)

In [12]:
encoder1 = EncoderRNN(vocab_size, hidden_size).to(device)
decoder1 = DecoderRNN(hidden_size, vocab_size).to(device)
trainIters(encoder1, decoder1, 12925, print_every=12924)

Now is epoch: 0
4m 42s (- 0m 0s) (12924 99%) 2.0195
Now is epoch: 1
9m 41s (- 0m 0s) (12924 99%) 1.1506
Now is epoch: 2
14m 54s (- 0m 0s) (12924 99%) 0.8388
Now is epoch: 3
20m 7s (- 0m 0s) (12924 99%) 0.6984
Now is epoch: 4
25m 19s (- 0m 0s) (12924 99%) 0.5988
Now is epoch: 5
30m 31s (- 0m 0s) (12924 99%) 0.5247
Now is epoch: 6
35m 43s (- 0m 0s) (12924 99%) 0.4653
Now is epoch: 7
40m 55s (- 0m 0s) (12924 99%) 0.4084
Now is epoch: 8
46m 8s (- 0m 0s) (12924 99%) 0.3624
Now is epoch: 9
51m 20s (- 0m 0s) (12924 99%) 0.3270
Now is epoch: 10
56m 32s (- 0m 0s) (12924 99%) 0.2904
Now is epoch: 11
61m 45s (- 0m 0s) (12924 99%) 0.2675
Now is epoch: 12
66m 57s (- 0m 0s) (12924 99%) 0.2420
Now is epoch: 13
71m 56s (- 0m 0s) (12924 99%) 0.2241
Now is epoch: 14
76m 52s (- 0m 0s) (12924 99%) 0.2094


In [None]:
testIters(encoder1, decoder1,50)

In [None]:
newtestIters(encoder1, decoder1,50)