In [81]:
! git clone https://github.com/deepmind/narrativeqa.git

fatal: destination path 'narrativeqa' already exists and is not an empty directory.


In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('/content/narrativeqa/qaps.csv')
print(df.shape)
df.head()

(46765, 8)


Unnamed: 0,document_id,set,question,answer1,answer2,question_tokenized,answer1_tokenized,answer2_tokenized
0,0025577043f5090cd603c6aea60f26e236195594,test,Who is Mark Hunter?,He is a high school student in Phoenix.,A loner and outsider student with a radio stat...,Who is Mark Hunter ?,He is a high school student in Phoenix .,A loner and outsider student with a radio stat...
1,0025577043f5090cd603c6aea60f26e236195594,test,Where does this radio station take place?,It takes place in Mark's parents basement.,"Phoenix, Arizona",Where does this radio station take place ?,It takes place in Mark s parents basement .,"Phoenix , Arizona"
2,0025577043f5090cd603c6aea60f26e236195594,test,Why do more students tune into Mark's show?,Mark talks about what goes on at school and in...,Because he has a thing to say about what is ha...,Why do more students tune into Mark s show ?,Mark talks about what goes on at school and in...,Because he has a thing to say about what is ha...
3,0025577043f5090cd603c6aea60f26e236195594,test,Who commits suicide?,Malcolm.,Malcolm.,Who commits suicide ?,Malcolm .,Malcolm .
4,0025577043f5090cd603c6aea60f26e236195594,test,What does Paige jam into her microwave?,She jams her medals and accolades.,Her award medals,What does Paige jam into her microwave ?,She jams her medals and accolades .,Her award medals


In [3]:
df = df[['question', 'answer1']]
df.head()

Unnamed: 0,question,answer1
0,Who is Mark Hunter?,He is a high school student in Phoenix.
1,Where does this radio station take place?,It takes place in Mark's parents basement.
2,Why do more students tune into Mark's show?,Mark talks about what goes on at school and in...
3,Who commits suicide?,Malcolm.
4,What does Paige jam into her microwave?,She jams her medals and accolades.


In [4]:
! python3 -m spacy download en
# ! python3 -m spacy download de

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_sm')
[38;5;2m✔ Linking successful[0m
/usr/local/lib/python3.6/dist-packages/en_core_web_sm -->
/usr/local/lib/python3.6/dist-packages/spacy/data/en
You can now load the model via spacy.load('en')


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim

from torchtext.data import Field, BucketIterator
from torchtext import data

import spacy
import numpy as np

import random
import math
import time
from tqdm import tqdm

In [6]:
SEED = 1234

random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [7]:
spacy_en = spacy.load('en')

In [8]:
def tokenize_en(text):
    return [tok.text for tok in spacy_en.tokenizer(text)]

In [9]:
SRC = Field(tokenize = tokenize_en, init_token = '<sos>', eos_token = '<eos>', lower = True)
TRG = Field(tokenize = tokenize_en, init_token = '<sos>', eos_token = '<eos>', lower = True)

In [10]:
fields = [('question', SRC), ('answer1', TRG)]

In [11]:
example = [data.Example.fromlist([df.question[i],df.answer1[i]], fields) for i in range(df.shape[0])] 

In [12]:
NQDataset = data.Dataset(example, fields)

In [13]:
print(vars(NQDataset.examples[0]))

{'question': ['who', 'is', 'mark', 'hunter', '?'], 'answer1': ['he', 'is', 'a', 'high', 'school', 'student', 'in', 'phoenix', '.']}


In [14]:
train_data, valid_data, test_data = NQDataset.split(split_ratio=[0.7, 0.2, 0.1], random_state=random.seed(SEED))

In [15]:
SRC.build_vocab(train_data, min_freq = 2)
TRG.build_vocab(train_data, min_freq = 2)

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [17]:
BATCH_SIZE = 128
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data),
    sort=False,
    batch_size = BATCH_SIZE,
    device = device
)

In [18]:
# batch = next(iter(train_iterator))

In [19]:
# x = batch.trg[0]
# x

In [20]:
# TRG.vocab.stoi['good']

In [21]:
# x.unsqueeze(1)

In [22]:
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder, device):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.device = device
    
    def forward(self, src, trg, teacher_forcing_ratio = 0.5):
        
        trg_len = trg.shape[0]
        batch_size = trg.shape[1]
        trg_vocab_size = self.decoder.output_dim
        outputs = torch.zeros(trg_len, batch_size, trg_vocab_size).to(self.device)
        hidden, cell = self.encoder(src)
        input = trg[0, :]
        
        for t in range(1, trg_len):
            output, hidden, cell = self.decoder(input, hidden, cell)
            outputs[t] = output

            teacher_force = random.random() < teacher_forcing_ratio

            top1 = output.argmax(1)
            input = trg[t] if teacher_force else top1
        return outputs  


In [23]:
class Encoder(nn.Module):
    def __init__(self, input_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()

        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.dropout = nn.Dropout(dropout)

    def forward(self, src):
        embedded = self.dropout(self.embedding(src))
        outputs, (hidden, cell) = self.rnn(embedded)
        return hidden, cell



In [24]:
class Decoder(nn.Module):
    def __init__(self, output_dim, emb_dim, hid_dim, n_layers, dropout):
        super().__init__()
        self.output_dim = output_dim
        self.emb_dim = emb_dim
        self.hid_dim = hid_dim
        self.n_layers = n_layers

        self.embedding = nn.Embedding(output_dim, emb_dim)
        self.rnn = nn.LSTM(emb_dim, hid_dim, n_layers, dropout = dropout)
        self.fc = nn.Linear(hid_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, input, hidden, cell):
        
        input = input.unsqueeze(0)
        embedded = self.dropout(self.embedding(input))
        output, (hidden, cell) = self.rnn(embedded, (hidden, cell))
        prediction = self.fc(output.squeeze(0))
        return prediction, hidden, cell



In [25]:
INPUT_DIM = len(SRC.vocab)
OUTPUT_DIM = len(TRG.vocab)
ENC_EMB_DIM = 256
DEC_EMB_DIM = 256
HID_DIM = 512
N_LAYERS = 2
ENC_DROPOUT = 0.5
DEC_DROPOUT = 0.5

enc = Encoder(INPUT_DIM, ENC_EMB_DIM, HID_DIM, N_LAYERS, ENC_DROPOUT)
dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, HID_DIM, N_LAYERS, DEC_DROPOUT)

model = Seq2Seq(enc, dec, device).to(device)

In [26]:
def init_weights(m):
    for name, param in m.named_parameters():
        nn.init.uniform_(param.data, -0.08, 0.08)
        
model.apply(init_weights)

Seq2Seq(
  (encoder): Encoder(
    (embedding): Embedding(11001, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (dropout): Dropout(p=0.5, inplace=False)
  )
  (decoder): Decoder(
    (embedding): Embedding(10651, 256)
    (rnn): LSTM(256, 512, num_layers=2, dropout=0.5)
    (fc): Linear(in_features=512, out_features=10651, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
  )
)

In [27]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(model):,} trainable parameters')

The model has 18,363,291 trainable parameters


In [28]:
optimizer = optim.Adam(model.parameters())

In [29]:
TRG_PAD_IDX = TRG.vocab.stoi[TRG.pad_token]

criterion = nn.CrossEntropyLoss(ignore_index = TRG_PAD_IDX)

In [30]:
def train(model, train_iterator, optimizer, criterion, clip):
    model.train()
    epoch_loss = 0
    for i, batch in enumerate(tqdm(train_iterator)):
        src, trg = batch.question, batch.answer1
        optimizer.zero_grad()
        output = model(src, trg)
        output_dim = output.shape[-1]
        output = output[1:].view(-1, output_dim)
        trg = trg[1:].view(-1)
        loss = criterion(output, trg)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        optimizer.step()
        epoch_loss += loss.item()
    return epoch_loss / len(train_iterator)

In [31]:
def evaluate(model, iterator, criterion):
    
    model.eval()
    
    epoch_loss = 0
    
    with torch.no_grad():
    
        for i, batch in enumerate(tqdm(iterator)):

            src = batch.question
            trg = batch.answer1

            output = model(src, trg, 0) #turn off teacher forcing

            #trg = [trg len, batch size]
            #output = [trg len, batch size, output dim]

            output_dim = output.shape[-1]
            
            output = output[1:].view(-1, output_dim)
            trg = trg[1:].view(-1)

            #trg = [(trg len - 1) * batch size]
            #output = [(trg len - 1) * batch size, output dim]

            loss = criterion(output, trg)
            
            epoch_loss += loss.item()
        
    return epoch_loss / len(iterator)

In [32]:
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [33]:
N_EPOCHS = 10
CLIP = 1

best_valid_loss = float('inf')

for epoch in range(N_EPOCHS):
    
    start_time = time.time()
    
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion)
    
    end_time = time.time()
    
    epoch_mins, epoch_secs = epoch_time(start_time, end_time)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), 'tut1-model.pt')
    
    print(f'\nEpoch: {epoch+1:02} | Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}')
    print(f'\t Val. Loss: {valid_loss:.3f} |  Val. PPL: {math.exp(valid_loss):7.3f}')

100%|██████████| 256/256 [00:47<00:00,  5.35it/s]
100%|██████████| 37/37 [00:02<00:00, 12.98it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 01 | Time: 0m 50s
	Train Loss: 5.937 | Train PPL: 378.797
	 Val. Loss: 5.546 |  Val. PPL: 256.109


100%|██████████| 256/256 [00:48<00:00,  5.32it/s]
100%|██████████| 37/37 [00:02<00:00, 12.95it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 02 | Time: 0m 50s
	Train Loss: 5.576 | Train PPL: 264.096
	 Val. Loss: 5.496 |  Val. PPL: 243.774


100%|██████████| 256/256 [00:47<00:00,  5.35it/s]
100%|██████████| 37/37 [00:02<00:00, 12.99it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 03 | Time: 0m 50s
	Train Loss: 5.397 | Train PPL: 220.721
	 Val. Loss: 5.433 |  Val. PPL: 228.930


100%|██████████| 256/256 [00:48<00:00,  5.29it/s]
100%|██████████| 37/37 [00:02<00:00, 13.06it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 04 | Time: 0m 51s
	Train Loss: 5.244 | Train PPL: 189.367
	 Val. Loss: 5.391 |  Val. PPL: 219.450


100%|██████████| 256/256 [00:48<00:00,  5.32it/s]
100%|██████████| 37/37 [00:02<00:00, 12.88it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 05 | Time: 0m 50s
	Train Loss: 5.122 | Train PPL: 167.719
	 Val. Loss: 5.393 |  Val. PPL: 219.824


100%|██████████| 256/256 [00:48<00:00,  5.29it/s]
100%|██████████| 37/37 [00:02<00:00, 13.00it/s]
  0%|          | 1/256 [00:00<00:45,  5.58it/s]


Epoch: 06 | Time: 0m 51s
	Train Loss: 5.009 | Train PPL: 149.800
	 Val. Loss: 5.370 |  Val. PPL: 214.760


100%|██████████| 256/256 [00:48<00:00,  5.30it/s]
100%|██████████| 37/37 [00:02<00:00, 12.90it/s]
  0%|          | 0/256 [00:00<?, ?it/s]


Epoch: 07 | Time: 0m 51s
	Train Loss: 4.939 | Train PPL: 139.614
	 Val. Loss: 5.364 |  Val. PPL: 213.520


100%|██████████| 256/256 [00:48<00:00,  5.32it/s]
100%|██████████| 37/37 [00:02<00:00, 13.00it/s]
  0%|          | 1/256 [00:00<00:47,  5.34it/s]


Epoch: 08 | Time: 0m 50s
	Train Loss: 4.835 | Train PPL: 125.852
	 Val. Loss: 5.363 |  Val. PPL: 213.409


100%|██████████| 256/256 [00:48<00:00,  5.32it/s]
100%|██████████| 37/37 [00:02<00:00, 12.92it/s]
  0%|          | 1/256 [00:00<00:48,  5.21it/s]


Epoch: 09 | Time: 0m 50s
	Train Loss: 4.762 | Train PPL: 116.948
	 Val. Loss: 5.399 |  Val. PPL: 221.100


100%|██████████| 256/256 [00:48<00:00,  5.27it/s]
100%|██████████| 37/37 [00:02<00:00, 12.99it/s]


Epoch: 10 | Time: 0m 51s
	Train Loss: 4.696 | Train PPL: 109.560
	 Val. Loss: 5.372 |  Val. PPL: 215.362



