In [None]:
import re
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
    
import torch
from torch.utils import data

import math
from tqdm import tqdm
import time

import pandas as pd

In [2]:
# device = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
SOS_token = 0
EOS_token = 1
PAD_token = 2
UNK_TOKEN = 3
CONTENT_MAX_LENGTH = 100
TITLE_MAX_LENGTH = 8

class Vocab:
    def __init__(self, name):
        self.name = name
        self.word2index = {"SOS": 0, "EOS": 1, "PAD": 2, 'UNK':3}
        self.index2word = {0: "SOS", 1: "EOS", 2: "PAD", 3: 'UNK'}
        self.n_words = 3
        self.word2count = {}
    
    def add_sentence(self, sentence):
        for word in sentence.lower().split():
            self.add_word(word)
    
    def add_word(self, word):
        if word not in self.word2index.keys():
            self.word2index[word] = self.n_words
            self.index2word[self.n_words] = word
            self.word2count[word] = 1
            self.n_words += 1
        else:
            self.word2count[word] += 1
    
    def to_json(self, file_path):
        pass
    
    def read_from_json(self, file_path):
        pass

In [4]:
train_df = pd.read_csv('../../courses/cse_842/bbc_data/train_split.csv')
test_df = pd.read_csv('../../courses/cse_842/bbc_data/test_split.csv')
train_df.shape, test_df.shape

((1977, 4), (349, 4))

In [5]:
train_df.head()

Unnamed: 0,file_path,class,title,content
0,/media/kuldeep/Work/college_stuff/courses/cse_...,entertainment,Elton plays Paris charity concert,Sir Elton John has performed at a special conc...
1,/media/kuldeep/Work/college_stuff/courses/cse_...,politics,Defiant hunts put ban to the test,Thousands of hunt supporters have been out on ...
2,/media/kuldeep/Work/college_stuff/courses/cse_...,sport,Injury doubts beset Wales squad,Wales have a clutch of injury worries before W...
3,/media/kuldeep/Work/college_stuff/courses/cse_...,business,Bombardier chief to leave company,Shares in train and plane-making giant Bombard...
4,/media/kuldeep/Work/college_stuff/courses/cse_...,entertainment,EastEnders 'is set for US remake',Plans to create a US soap based on the BBC's E...


In [6]:
def normalize_string(s):
    s = s.lower().strip()
    s = re.sub(r"([.!?])", r" \1", s)
    s = re.sub(r"[^a-zA-Z.!?]+", r" ", s)
    return s


def prepare_data(
    file_path, context_max_length=CONTENT_MAX_LENGTH, 
    title_max_length=TITLE_MAX_LENGTH
):
    df = pd.read_csv(file_path)
    pairs = []
    for _, row in df.iterrows():
        pairs.append(
            [
                row['title'], row['content']
            ]
        )
    print("{} titles and content read.".format(len(pairs)))
    pairs = [[normalize_string(p[0]), normalize_string(p[1])] for p in pairs]

    return pairs

def populate_vocab(vocab, pairs):
    for ti, co in pairs:
        vocab.add_sentence(co)
    return

In [7]:
train_pairs = prepare_data('../../courses/cse_842/bbc_data/train_split.csv')
print(random.choice(train_pairs))

1977 titles and content read.
['mallon wades into ne vote battle', 'middlesbrough mayor ray mallon has been drafted in to boost the yes campaign as the north east assembly referendum enters its final week . the former police chief dubbed robocop for his zero tolerance style clashed on thursday with sunderland no campaigner neil herron . mr mallon said an assembly would give local people more of a say over key issues such as transport and crime . but mr herron said north east people did not want or need an assembly . the pair met on the platform at sunderland station as mr mallon toured the region highlighting claimed improvements to transport if the area gets an assembly . but mr herron who gained fame as one of sunderland s metric martyrs and is running his own no campaign alongside the official north east says no campaign said he was not convinced by mr mallon s arguments . the reality is that it is not going to deliver he said . labour has had two and a half years to convince people

In [8]:
class Dataset(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, pairs, vocab, max_len_title, max_len_content):
        'Initialization'
        self.pairs = pairs
        self.max_len_title = max_len_title
        self.max_len_content = max_len_content
        self.vocab = vocab
        self.input_content = [tensorFromSentence(self.vocab, inp[1], self.max_len_content) for inp in self.pairs]
        self.output_title = [tensorFromSentence(self.vocab, inp[0], self.max_len_title) for inp in self.pairs]

    def __len__(self):
        'Denotes the total number of samples'
        return len(self.pairs)

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        # Load data and get label
        X = self.input_content[index]
        y = self.output_title[index]

        return X, y

def indexesFromSentence(vocab, sentence, max_len):
    l = [vocab.word2index.get(word, vocab.word2index['UNK']) for word in sentence.split()]
    if len(l) > max_len - 2:
        l = l[:max_len-2]
    
    l = [0] + l + [1] 
    if len(l) < max_len:
        for i in range(len(l), max_len):
            l.append(vocab.word2index["PAD"])
    
    return l


def tensorFromSentence(vocab, sentence, max_len):
    indexes = indexesFromSentence(vocab, sentence, max_len)
    return torch.tensor(indexes, dtype=torch.long, device=device).view(-1, 1)

In [14]:
class EncoderRnn(nn.Module):
    def __init__(self, vocab_size, embedding_dim, enc_hidden_state_size, dec_hidden_state_size, num_layers, dropout=0.5,
                bidirectional=True):
        super(EncoderRnn, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.enc_hidden_state_size = enc_hidden_state_size
        self.dec_hidden_state_size = dec_hidden_state_size
        self.num_layers=num_layers
        self.dropout = dropout
        self.bidirectional = bidirectional
        
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        
        self.rnn = nn.GRU(input_size=self.embedding_dim, hidden_size=self.enc_hidden_state_size,
                          num_layers=self.num_layers, dropout=self.dropout, bidirectional=self.bidirectional)
        
        self.combined_context_layer = nn.Linear(self.enc_hidden_state_size * 2, self.dec_hidden_state_size)
        
        self.dropout_layer = nn.Dropout(self.dropout)
    
    def forward(self, inp):        
        # inp = [sent_length, batch_size]

        embedded = self.dropout_layer(self.embedding(inp))
        # embedded = [sent_length, batch_size, embedding_dim]
        outputs, hidden = self.rnn(embedded)
        # outputs = [seq_len, batch, num_directions * hidden_size]
        # hidden = [num_layers*num_directions, batch, hidden_size]

        combined_context = torch.tanh(self.combined_context_layer(torch.cat((hidden[-2, : ,:], hidden[-1, :, :]), dim=1)))
        # combined_context = [batch_size, dec_hidden_state_size]
        
        return outputs, combined_context

Attention Layer

In [15]:
class AttentionLayer(nn.Module):
    def __init__(self, dec_hidden_state_size, enc_hidden_state_size):
        super(AttentionLayer, self).__init__()
        self.dec_hidden_state_size = dec_hidden_state_size
        self.enc_hidden_state_size = enc_hidden_state_size
        
        self.attn = nn.Linear((2 * self.enc_hidden_state_size) + self.dec_hidden_state_size, self.dec_hidden_state_size)
        
        self.v = nn.Parameter(torch.rand(self.dec_hidden_state_size))
        
    def forward(self, hidden, enc_outputs):
        # hidden = [batch_size, dec_hidden_state_size]
        # enc_outputs = [src_sent_len, batch_size, enc_hidden_state_size*2]
        
        batch_size = hidden.shape[0]
        src_seq_len = enc_outputs.shape[0]
        
        # calculating the energy 
        hidden = hidden.unsqueeze(1).repeat(1, src_seq_len, 1)
        enc_outputs = enc_outputs.permute(1, 0, 2)
        # hidden = [batch_size, src_seq_len, dec_hidden_state_size]
        # enc_outputs = [batch_size, src_seq_len, 2*enc_hidden_state_size]
        
        energy = torch.tanh(self.attn(torch.cat((hidden, enc_outputs), dim=2)))
        # energy = [batch_size, src_seq_len, dec_hidden_state_size]
        # v = [dec_hidden_state_size]
        
        energy = energy.permute(0, 2, 1)
        # energy = [batch_size, dec_hidden_state_size, src_seq_len]
        
        v = self.v.repeat(batch_size, 1).unsqueeze(1)
        
        attn = torch.bmm(v, energy)
        # attn = [batch_size, 1, src_seq_len]
        
        attn = attn.squeeze(1)
        # attn = [batch_size, src_seq_len]

        return F.softmax(attn, dim=1)

Decoder Unit

In [16]:
class DecoderRnn(nn.Module):
    def __init__(self, vocab_size, embedding_dim, enc_hidden_state_size, 
                 dec_hidden_state_size, num_layers=1, dropout=0.5, 
                 bidirectional=False):
        
        super(DecoderRnn, self).__init__()
        self.vocab_size = vocab_size
        self.embedding_dim = embedding_dim
        self.enc_hidden_state_size = enc_hidden_state_size
        self.dec_hidden_state_size = dec_hidden_state_size
        self.num_layers = num_layers
        self.dropout = dropout
        self.bidirectional = bidirectional
        
        self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim)
        
        self.attn_layer = AttentionLayer(self.dec_hidden_state_size, self.enc_hidden_state_size)
        
        self.rnn = nn.GRU(input_size=(2 * self.enc_hidden_state_size) + self.embedding_dim, hidden_size=self.dec_hidden_state_size,
                          num_layers=self.num_layers, dropout=self.dropout, bidirectional=self.bidirectional)
        
        self.linear_layer = nn.Linear((2 * self.enc_hidden_state_size) + self.embedding_dim + self.dec_hidden_state_size, self.vocab_size)
        
        self.dropout_layer = nn.Dropout(self.dropout)
        
    def forward(self, inp, dec_hidden_state, enc_outputs):
        # inp = [batch_size]
        # dec_hidden_state = [batch_size, dec_hidden_state]
        # enc_outputs = [src_seq_len, batch_size, 2*enc_hidden_state]
        
        inp = inp.unsqueeze(0)
        embedded = self.dropout_layer(self.embedding(inp))
        # embedded = [1, batch_size, embedding_dim]
        
        attn_weights = self.attn_layer(dec_hidden_state, enc_outputs).unsqueeze(1)
        # attn_weights = [batch_size, 1, src_seq_len]
        
        enc_outputs = enc_outputs.permute(1, 0, 2)
        # enc_outputs = [batch_size, src_seq_len, 2*embedding_dim]
        
        weighted = torch.bmm(attn_weights, enc_outputs).squeeze(1).unsqueeze(0)
        # weighted = [1, batch_size, 2*embedding_dim]
        
        rnn_input = torch.cat((embedded, weighted), dim=2)
        # rnn_input = [1, batch_size, 2*enc_hidden_state_size + embedding_dim]
        
        dec_outputs, dec_hidden_state = self.rnn(rnn_input, dec_hidden_state.unsqueeze(0))
        # dec_outputs == dec_hidden_state
        
        # dec_outputs = [1, batch_size, dec_hidden_state_size]
        # dec_hidden_state = [1, batch_size, dec_hidden_state_size]
        
        linear_layer_input = torch.cat((embedded.squeeze(0), weighted.squeeze(0), dec_outputs.squeeze(0)), dim=1)
        # linear_layer_input = [batch_size, 2*enc_hidden_state_size + embedding_dim + dec_hidden_state_size]
        
        outputs = self.linear_layer(linear_layer_input)
        # outputs = [batch_size, vocab_size]
        
        dec_hidden_state = dec_hidden_state.squeeze(0)
        
        return outputs, dec_hidden_state

Seq2Seq

In [17]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, trg_sos_idx, device):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.trg_sos_idx = trg_sos_idx
        self.device = device
        
    def forward(self, src, trg, teacher_forcing_ratio=0.5):
        # src = [src_seq_len, batch_size]
        # trg = [trg_seq_len, batch_size]
        
        batch_size = src.shape[1]
        trg_seq_len = trg.shape[0]
        
        # final outputs from decoder
        final_outputs = torch.zeros((trg_seq_len, batch_size, self.decoder.vocab_size)).to(self.device)
        # setting first output as sos
        final_outputs[0, :, self.trg_sos_idx] = 0.98
        
        # encoder outputs
        enc_outputs, enc_hidden = self.encoder(src)
        dec_hidden_state = enc_hidden
        # print(enc_hidden.shape)
        dec_input = trg[0, :]
        for t in range(1, trg_seq_len):
            dec_outputs, dec_hidden_state = self.decoder(dec_input, dec_hidden_state, enc_outputs)
            final_outputs[t, :, :] = dec_outputs
            
            teacher_force = random.random() < teacher_forcing_ratio
            if teacher_force:
                dec_input = trg[t, :]
            else:
                dec_input = dec_outputs.max(1)[1]
        
        return final_outputs

In [18]:
train_pairs = prepare_data('../../courses/cse_842/bbc_data/train_split.csv')
test_pairs = prepare_data('../../courses/cse_842/bbc_data/test_split.csv')

1977 titles and content read.
349 titles and content read.


In [19]:
vocab = Vocab('title_content')
populate_vocab(vocab, train_pairs)

In [20]:
EMBEDDING_DIM = 50
HIDDEN_DIM = 128
VOCAB_SIZE = vocab.n_words
NUM_LAYERS_ENCODER = 3
NUM_LAYERS_DECODER = 1

# encoder = EncoderRnn(
#     hidden_size=HIDDEN_DIM, embedding_dim=EMBEDDING_DIM, num_layers=NUM_LAYERS,
#     vocab_size=VOCAB_SIZE
# )

# decoder = DecodeRnn(
#     vocab_size=VOCAB_SIZE, hidden_size=HIDDEN_DIM, embedding_dim=EMBEDDING_DIM, num_layers=NUM_LAYERS
# )

# s2s = Seq2Seq(
#     encoder, decoder, device
# )

encoder = EncoderRnn(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIM,
    enc_hidden_state_size=HIDDEN_DIM,
    dec_hidden_state_size=HIDDEN_DIM,
    num_layers=NUM_LAYERS_ENCODER,
    dropout=0.2,
    bidirectional=True
)

decoder = DecoderRnn(
    vocab_size=VOCAB_SIZE,
    embedding_dim=EMBEDDING_DIM,
    enc_hidden_state_size=HIDDEN_DIM,
    dec_hidden_state_size=HIDDEN_DIM,
    num_layers=NUM_LAYERS_DECODER,
    dropout=0.2,
    bidirectional=False
)

model = Seq2Seq(
    encoder, decoder, 0, device
)



In [21]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 15,111,284 trainable parameters


In [22]:
optimizer = optim.Adam(model.parameters())
criterion = nn.CrossEntropyLoss(ignore_index=2)

In [23]:
model = model.to(device)
criterion = criterion.to(device)

In [40]:
params = {'batch_size': 16,
          'shuffle': True,
#           'num_workers': 6,
          }

# Generators
training_set = Dataset(
    train_pairs, vocab=vocab, max_len_title=TITLE_MAX_LENGTH, 
    max_len_content=CONTENT_MAX_LENGTH
)
training_generator = data.DataLoader(training_set, **params)

val_set = Dataset(
    test_pairs, vocab=vocab, max_len_title=TITLE_MAX_LENGTH, 
    max_len_content=CONTENT_MAX_LENGTH
)
val_generator = data.DataLoader(val_set, **params)

In [25]:
len(training_generator), len(val_generator)

(124, 22)

In [26]:
def train(model, iterator, optimizer, criterion, clip, batch_size, device, teacher_forcing_ratio=0.25):
    model.train()
    
    epoch_loss = 0
    
    for i, batch in tqdm(enumerate(iterator)):
        
        src = batch[0].permute(1,0,2).squeeze(-1).to(device).contiguous()
        trg = batch[1].permute(1,0,2).squeeze(-1).to(device).contiguous()
        optimizer.zero_grad()
        output = model(src, trg, teacher_forcing_ratio=teacher_forcing_ratio)
        
        #trg = [trg sent len, batch size]
        #output = [trg sent len, batch size, output dim]
        output = output[1:].view(-1, output.shape[-1])
        trg = trg[1:].view(-1)
        
        #trg = [(trg sent len - 1) * batch size]
        #output = [(trg sent len - 1) * batch size, output dim]
        
        loss = criterion(output, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)


def evaluate(model, iterator, optimizer, criterion, clip, batch_size, device, teacher_forcing_ratio=0.2):
    model.eval()
    epoch_loss = 0
    with torch.no_grad():
    
        for i, batch in tqdm(enumerate(iterator)):
            src = batch[0].permute(1,0,2).squeeze(-1).to(device).contiguous()
            trg = batch[1].permute(1,0,2).squeeze(-1).to(device).contiguous()

            output = model(src, trg, teacher_forcing_ratio) #turn off teacher forcing
            #trg = [trg sent len, batch size]
            #output = [trg sent len, batch size, output dim]

            output = output[1:].view(-1, output.shape[-1])
            trg = trg[1:].view(-1)
            #trg = [(trg sent len - 1) * batch size]
            #output = [(trg sent len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [27]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs


In [33]:
N_EPOCHS = 50
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, training_generator, optimizer, criterion,
                       CLIP, params["batch_size"], device, teacher_forcing_ratio=0.5)
    valid_loss = evaluate(model, val_generator, optimizer, criterion,
                       CLIP, params["batch_size"], device, teacher_forcing_ratio=0.5)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'Epoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

124it [00:03, 35.29it/s]
22it [00:00, 127.29it/s]


Epoch: 01 | Time: 0m 3s
	Train Loss: 3.386 | Train PPL:  29.535
	 Val. Loss: 9.319 |  Val. PPL: 11148.428


124it [00:03, 35.63it/s]
22it [00:00, 129.63it/s]


Epoch: 02 | Time: 0m 3s
	Train Loss: 3.496 | Train PPL:  32.968
	 Val. Loss: 9.104 |  Val. PPL: 8993.780


124it [00:03, 35.60it/s]
22it [00:00, 129.42it/s]


Epoch: 03 | Time: 0m 3s
	Train Loss: 3.166 | Train PPL:  23.709
	 Val. Loss: 9.184 |  Val. PPL: 9739.081


124it [00:03, 35.44it/s]
22it [00:00, 113.36it/s]


Epoch: 04 | Time: 0m 3s
	Train Loss: 3.132 | Train PPL:  22.922
	 Val. Loss: 8.943 |  Val. PPL: 7657.616


124it [00:03, 34.92it/s]
22it [00:00, 131.56it/s]


Epoch: 05 | Time: 0m 3s
	Train Loss: 3.332 | Train PPL:  27.992
	 Val. Loss: 9.107 |  Val. PPL: 9014.059


124it [00:03, 35.49it/s]
22it [00:00, 129.22it/s]


Epoch: 06 | Time: 0m 3s
	Train Loss: 3.367 | Train PPL:  28.985
	 Val. Loss: 8.911 |  Val. PPL: 7416.323


124it [00:03, 35.21it/s]
22it [00:00, 129.94it/s]


Epoch: 07 | Time: 0m 3s
	Train Loss: 2.970 | Train PPL:  19.483
	 Val. Loss: 9.157 |  Val. PPL: 9476.103


124it [00:03, 35.40it/s]
22it [00:00, 130.04it/s]


Epoch: 08 | Time: 0m 3s
	Train Loss: 3.238 | Train PPL:  25.471
	 Val. Loss: 8.861 |  Val. PPL: 7050.525


124it [00:03, 35.50it/s]
22it [00:00, 138.22it/s]


Epoch: 09 | Time: 0m 3s
	Train Loss: 3.563 | Train PPL:  35.286
	 Val. Loss: 8.825 |  Val. PPL: 6802.725


124it [00:03, 35.60it/s]
22it [00:00, 138.37it/s]


Epoch: 10 | Time: 0m 3s
	Train Loss: 3.387 | Train PPL:  29.562
	 Val. Loss: 9.007 |  Val. PPL: 8161.307


124it [00:03, 35.66it/s]
22it [00:00, 136.37it/s]


Epoch: 11 | Time: 0m 3s
	Train Loss: 3.502 | Train PPL:  33.192
	 Val. Loss: 8.992 |  Val. PPL: 8035.211


124it [00:03, 35.63it/s]
22it [00:00, 140.27it/s]


Epoch: 12 | Time: 0m 3s
	Train Loss: 3.659 | Train PPL:  38.837
	 Val. Loss: 9.061 |  Val. PPL: 8614.363


124it [00:03, 35.63it/s]
22it [00:00, 137.61it/s]


Epoch: 13 | Time: 0m 3s
	Train Loss: 3.436 | Train PPL:  31.055
	 Val. Loss: 8.894 |  Val. PPL: 7285.521


124it [00:03, 35.63it/s]
22it [00:00, 138.89it/s]


Epoch: 14 | Time: 0m 3s
	Train Loss: 3.571 | Train PPL:  35.562
	 Val. Loss: 8.711 |  Val. PPL: 6068.131


124it [00:03, 35.57it/s]
22it [00:00, 138.65it/s]


Epoch: 15 | Time: 0m 3s
	Train Loss: 3.682 | Train PPL:  39.720
	 Val. Loss: 8.916 |  Val. PPL: 7451.178


124it [00:03, 35.55it/s]
22it [00:00, 136.52it/s]


Epoch: 16 | Time: 0m 3s
	Train Loss: 4.020 | Train PPL:  55.690
	 Val. Loss: 8.865 |  Val. PPL: 7076.822


124it [00:03, 35.57it/s]
22it [00:00, 140.50it/s]


Epoch: 17 | Time: 0m 3s
	Train Loss: 3.984 | Train PPL:  53.735
	 Val. Loss: 8.493 |  Val. PPL: 4879.797


124it [00:03, 35.51it/s]
22it [00:00, 137.93it/s]


Epoch: 18 | Time: 0m 3s
	Train Loss: 3.750 | Train PPL:  42.524
	 Val. Loss: 8.617 |  Val. PPL: 5525.470


124it [00:03, 35.56it/s]
22it [00:00, 138.73it/s]


Epoch: 19 | Time: 0m 3s
	Train Loss: 3.815 | Train PPL:  45.379
	 Val. Loss: 8.711 |  Val. PPL: 6067.193


124it [00:03, 35.57it/s]
22it [00:00, 138.72it/s]


Epoch: 20 | Time: 0m 3s
	Train Loss: 3.607 | Train PPL:  36.848
	 Val. Loss: 8.821 |  Val. PPL: 6774.141


124it [00:03, 35.49it/s]
22it [00:00, 137.64it/s]


Epoch: 21 | Time: 0m 3s
	Train Loss: 3.489 | Train PPL:  32.741
	 Val. Loss: 8.498 |  Val. PPL: 4905.851


124it [00:03, 35.55it/s]
22it [00:00, 139.08it/s]


Epoch: 22 | Time: 0m 3s
	Train Loss: 3.551 | Train PPL:  34.833
	 Val. Loss: 8.534 |  Val. PPL: 5084.304


124it [00:03, 35.51it/s]
22it [00:00, 136.63it/s]


Epoch: 23 | Time: 0m 3s
	Train Loss: 3.424 | Train PPL:  30.691
	 Val. Loss: 8.749 |  Val. PPL: 6302.800


124it [00:03, 35.49it/s]
22it [00:00, 139.93it/s]


Epoch: 24 | Time: 0m 3s
	Train Loss: 3.313 | Train PPL:  27.467
	 Val. Loss: 8.634 |  Val. PPL: 5620.830


124it [00:03, 35.51it/s]
22it [00:00, 139.23it/s]


Epoch: 25 | Time: 0m 3s
	Train Loss: 3.376 | Train PPL:  29.267
	 Val. Loss: 8.677 |  Val. PPL: 5868.113


124it [00:03, 35.47it/s]
22it [00:00, 136.97it/s]


Epoch: 26 | Time: 0m 3s
	Train Loss: 3.457 | Train PPL:  31.728
	 Val. Loss: 9.029 |  Val. PPL: 8343.025


124it [00:03, 35.49it/s]
22it [00:00, 138.84it/s]


Epoch: 27 | Time: 0m 3s
	Train Loss: 3.490 | Train PPL:  32.798
	 Val. Loss: 8.841 |  Val. PPL: 6910.922


124it [00:03, 35.54it/s]
22it [00:00, 137.44it/s]


Epoch: 28 | Time: 0m 3s
	Train Loss: 3.531 | Train PPL:  34.147
	 Val. Loss: 9.002 |  Val. PPL: 8120.961


124it [00:03, 35.54it/s]
22it [00:00, 138.57it/s]


Epoch: 29 | Time: 0m 3s
	Train Loss: 3.298 | Train PPL:  27.067
	 Val. Loss: 9.123 |  Val. PPL: 9164.715


124it [00:03, 35.52it/s]
22it [00:00, 138.81it/s]


Epoch: 30 | Time: 0m 3s
	Train Loss: 3.517 | Train PPL:  33.679
	 Val. Loss: 9.213 |  Val. PPL: 10023.898


124it [00:03, 35.53it/s]
22it [00:00, 137.13it/s]


Epoch: 31 | Time: 0m 3s
	Train Loss: 3.409 | Train PPL:  30.238
	 Val. Loss: 9.521 |  Val. PPL: 13644.034


124it [00:03, 35.41it/s]
22it [00:00, 126.61it/s]


Epoch: 32 | Time: 0m 3s
	Train Loss: 3.330 | Train PPL:  27.940
	 Val. Loss: 9.253 |  Val. PPL: 10431.093


124it [00:03, 35.48it/s]
22it [00:00, 135.55it/s]


Epoch: 33 | Time: 0m 3s
	Train Loss: 3.249 | Train PPL:  25.760
	 Val. Loss: 8.836 |  Val. PPL: 6875.268


124it [00:03, 35.51it/s]
22it [00:00, 139.81it/s]


Epoch: 34 | Time: 0m 3s
	Train Loss: 3.170 | Train PPL:  23.809
	 Val. Loss: 9.082 |  Val. PPL: 8799.428


124it [00:03, 35.54it/s]
22it [00:00, 136.68it/s]


Epoch: 35 | Time: 0m 3s
	Train Loss: 3.359 | Train PPL:  28.767
	 Val. Loss: 9.015 |  Val. PPL: 8229.067


124it [00:03, 35.52it/s]
22it [00:00, 138.21it/s]


Epoch: 36 | Time: 0m 3s
	Train Loss: 3.811 | Train PPL:  45.184
	 Val. Loss: 9.084 |  Val. PPL: 8816.577


124it [00:03, 35.48it/s]
22it [00:00, 139.59it/s]


Epoch: 37 | Time: 0m 3s
	Train Loss: 3.659 | Train PPL:  38.814
	 Val. Loss: 8.777 |  Val. PPL: 6481.394


124it [00:03, 35.48it/s]
22it [00:00, 136.74it/s]


Epoch: 38 | Time: 0m 3s
	Train Loss: 3.427 | Train PPL:  30.798
	 Val. Loss: 9.081 |  Val. PPL: 8788.785


124it [00:03, 35.52it/s]
22it [00:00, 140.31it/s]


Epoch: 39 | Time: 0m 3s
	Train Loss: 3.413 | Train PPL:  30.349
	 Val. Loss: 8.794 |  Val. PPL: 6595.836


124it [00:03, 35.45it/s]
22it [00:00, 139.06it/s]


Epoch: 40 | Time: 0m 3s
	Train Loss: 3.525 | Train PPL:  33.953
	 Val. Loss: 9.033 |  Val. PPL: 8379.125


124it [00:03, 35.41it/s]
22it [00:00, 137.18it/s]


Epoch: 41 | Time: 0m 3s
	Train Loss: 3.514 | Train PPL:  33.568
	 Val. Loss: 8.893 |  Val. PPL: 7277.476


124it [00:03, 35.47it/s]
22it [00:00, 139.09it/s]


Epoch: 42 | Time: 0m 3s
	Train Loss: 3.779 | Train PPL:  43.786
	 Val. Loss: 8.923 |  Val. PPL: 7501.898


124it [00:03, 35.48it/s]
22it [00:00, 138.37it/s]


Epoch: 43 | Time: 0m 3s
	Train Loss: 3.359 | Train PPL:  28.765
	 Val. Loss: 8.847 |  Val. PPL: 6952.353


124it [00:03, 35.52it/s]
22it [00:00, 139.15it/s]


Epoch: 44 | Time: 0m 3s
	Train Loss: 3.907 | Train PPL:  49.764
	 Val. Loss: 8.746 |  Val. PPL: 6282.871


124it [00:03, 35.49it/s]
22it [00:00, 138.63it/s]


Epoch: 45 | Time: 0m 3s
	Train Loss: 3.516 | Train PPL:  33.648
	 Val. Loss: 8.857 |  Val. PPL: 7019.961


124it [00:03, 35.50it/s]
22it [00:00, 136.69it/s]


Epoch: 46 | Time: 0m 3s
	Train Loss: 3.730 | Train PPL:  41.664
	 Val. Loss: 8.797 |  Val. PPL: 6616.988


124it [00:03, 35.50it/s]
22it [00:00, 139.77it/s]


Epoch: 47 | Time: 0m 3s
	Train Loss: 3.596 | Train PPL:  36.435
	 Val. Loss: 8.752 |  Val. PPL: 6324.862


124it [00:03, 35.51it/s]
22it [00:00, 136.86it/s]


Epoch: 48 | Time: 0m 3s
	Train Loss: 3.718 | Train PPL:  41.182
	 Val. Loss: 8.716 |  Val. PPL: 6101.591


124it [00:03, 35.47it/s]
22it [00:00, 138.36it/s]


Epoch: 49 | Time: 0m 3s
	Train Loss: 3.459 | Train PPL:  31.780
	 Val. Loss: 9.087 |  Val. PPL: 8840.035


124it [00:03, 35.57it/s]
22it [00:00, 139.24it/s]

Epoch: 50 | Time: 0m 3s
	Train Loss: 3.277 | Train PPL:  26.488
	 Val. Loss: 8.810 |  Val. PPL: 6702.877





In [34]:
train_loss, math.exp(valid_loss)

(3.276680499315262, 6702.8769846770065)

In [37]:
def generate_title(content, title, vocab, model, title_max_len, content_max_len):
    model.eval()
    src = tensorFromSentence(vocab, content, content_max_len)
    trg = tensorFromSentence(vocab, title, title_max_len)
    
    out = model(src, trg, teacher_forcing_ratio=0.5)
    out = F.softmax(out, dim=2)
    predictions = out.max(2)[1].view(-1)
    sentence = " ".join([vocab.index2word[w] for w in list(predictions.tolist())])
    print("       Input == {}".format(" ".join([vocab.index2word[w] for w in src.view(-1).tolist()])))
    print("Model Output == {}".format(sentence))
    print("Ground Truth == {}".format(" ".join([vocab.index2word[w] for w in trg.view(-1).tolist()])))
    return out

In [38]:
out = generate_title(train_pairs[10][1], train_pairs[10][0], vocab, model, title_max_len=TITLE_MAX_LENGTH, content_max_len=CONTENT_MAX_LENGTH)

       Input == SOS tony blair has told labour supporters he s back and still hungry for the job of prime minister but does that sum up the mood at the party s spring conference in gateshead ? the electorate are keener on the government than some labour party members is the dry assessment of graham lane leader of the labour group on newham council . the problem according to mr lane is not continuing divisions over iraq foundation hospitals or tuition fees or even voter apathy but mr blair himself . i have a new slogan . vote blair get brown EOS
Model Output == SOS us s maruti support takes stock EOS
Ground Truth == SOS labour s core support takes stock EOS


In [39]:
for p in test_pairs[:20]:
    generate_title(p[1], p[0], vocab, model, title_max_len=TITLE_MAX_LENGTH, content_max_len=CONTENT_MAX_LENGTH)
    print()

       Input == SOS terrorists might try to target the uk in the run up to the election london s most senior police officer has said . sir ian blair said terror groups would remember the effect of the madrid bomb on spain s general election last year . other potential targets were the royal wedding and the uk s presidency of the european union and g he said . he refused to say if there was specific information about the risk of a pre poll attack . no was similarly cautious but said the threat was real . the comments come EOS
Model Output == SOS us sir to pinochet plan EOS EOS
Ground Truth == SOS election could be terror target EOS PAD

       Input == SOS goals from gregory vignal and nacho novo gave rangers a scrappy victory at celtic park that moves them three points clear of the champions . rangers had rarely threatened until celtic goalkeeper sir douglas let defender vignal s yard drive slip through his grasp and into the net . opposite number ronald sir had been rangers hero savin

In [30]:
for p in test_pairs:
    print(p)

['rangers seal old firm win', 'goals from gregory vignal and nacho novo gave rangers a scrappy victory at celtic park that moves them three points clear of the champions . rangers had rarely threatened until celtic goalkeeper rab douglas let defender vignal s yard drive slip through his grasp and into the net . opposite number ronald waterreus had been rangers hero saving superbly from craig bellamy and john hartson . striker novo secured victory lobbing douglas with eight minutes remaining . it ended celtic s game unbeaten run at home in old firm derbies and gave rangers manager alex mcleish his first victory at the home of his glasgow rivals . celtic had won their last six meetings on their home pitch including twice already this season . they started confidently with new signing bellamy on loan from newcastle united given his celtic debut up front with wales international colleague john hartson and chris sutton dropping into midfield . it took bellamy just four minutes to threaten t