In [1]:
from modules.texts import Vocab, GloVeLoader
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from modules.layers import SentenceEncoder, DocumentEncoder, ExtractorCell, DocumentClassifier
from modules.data import Documents
from torch.utils.data import DataLoader

In [None]:
import numpy as np

# Load the pretrained embedding into the memory
path_glove = os.path.join(os.path.expanduser('~'),
             'data/NLP/word_embeddings/GloVe/glove.6B.200d.txt')
glove = GloVeLoader(path_glove)

# Load the dataset
doc_file = './data/kaggle_news_rouge1.pkl'
docs = Documents(doc_file, vocab_size = 30000)
docs.set_doc_classes(np.random.randint(2, size = len(docs)).tolist()) # attach random document labels
vocab = docs.vocab

d = 200
emb = nn.Embedding(vocab.V, d)

def init_emb(emb, vocab):
    for word in vocab.word2id:
        try:
            emb.weight.data[vocab[word]] = torch.from_numpy(glove[word])
        except KeyError as e:
            # Case when pretrained embedding for a word does not exist
            pass
    # emb.weight.requires_grad = False # suppress updates
    print('Initialized the word embeddings.')

init_emb(emb, vocab)

The pretrained vector file to use: /home/yhs/data/NLP/word_embeddings/GloVe/glove.6B.200d.txt
The number of words in the pretrained vector: 400000
The dimension of the pretrained vector: 200


`backward()` for multi-task learning
- Use the following syntax:
```python
import torch
torch.autograd.backward([loss1, loss2])
```
- See [1](https://discuss.pytorch.org/t/how-to-do-backward-for-a-net-with-multiple-outputs/87), [2](https://discuss.pytorch.org/t/how-does-autograd-handle-multiple-objectives/341/2)

In [3]:
vocab_size = vocab.V
emb_size = emb.weight.data.size(1)
n_kernels = 50
kernel_sizes = [1,2,3,4,5]
pretrained = emb
sent_size = len(kernel_sizes) * n_kernels
hidden_size = 400
batch_size = 1
n_classes = 2

torch.manual_seed(7) # fix the cpu seed for reproducibility
torch.cuda.manual_seed(7) # fix the gpu seed for reproducibility

####WARNING: No mini-batch processing#########
s_encoder = SentenceEncoder(vocab_size,
                            emb_size,
                            n_kernels,
                            kernel_sizes,
                            pretrained)
d_encoder = DocumentEncoder(sent_size, hidden_size)
ext_cell = ExtractorCell(sent_size, hidden_size)
d_classifier = DocumentClassifier(sent_size, n_classes)

loss_fn_ext = nn.BCELoss() # Extractive loss
loss_fn_dclass = nn.NLLLoss() # Document classification loss

params = list(s_encoder.parameters()) + list(d_encoder.parameters()) +\
list(ext_cell.parameters()) + list(d_classifier.parameters())
optimizer = optim.Adam(params, lr = .005)

def get_accuracy(probs, targets, verbose = False):   
    '''
    Calculates the accuracy for the predictions

    Args:
        probs: extraction probability
        targets: ground truth labels for extraction
    '''
    import numpy as np
    preds = np.array([1 if p > 0.5 else 0 for p in probs])
    if verbose:
        print(preds)
    accuracy = np.mean(preds == targets)
    
    return accuracy

def run_epoch(docs):
    
    epoch_loss_ext = 0
    epoch_loss_dclass = 0
    epoch_accuracy_ext = 0
    epoch_accuracy_dclass = 0
    
    for doc in docs:
        docloader = DataLoader(doc, batch_size=1, shuffle=False)
        # Encode the sentences in a document
        inputs = []
        targets = []
        doc_class = Variable(torch.LongTensor([doc.doc_class])).cuda()
        for input_raw, target_raw in docloader:
            # only accept sentences that conforms the maximum kernel sizes
            if input_raw.size(1) < max(kernel_sizes):
                continue
            input_raw = Variable(input_raw).cuda()
            inputs.append(s_encoder(input_raw))
            targets.append(target_raw.cuda())
        # Ignore if the content is a single sentence(no need to train)
        if len(inputs) <= 1:
            continue
        
        # Build the document representation using encoded sentences
        d_encoded = torch.cat(inputs, dim = 0)
        targets = Variable(torch.cat(targets, dim = 0).type(torch.FloatTensor).view(-1)).cuda()
        #### WARNING: "BEGINNING OF THE SENTENCE" embedding was initialized to zero ####
        init_sent = Variable(torch.zeros(1, d_encoded.size(1))).cuda()
        d_final = torch.cat([init_sent, d_encoded[:-1]], dim = 0)
        d_final = d_final.view(d_final.size(0),1,d_final.size(1))

        # Initialize the d_encoder
        h, c = d_encoder.init_h0c0(batch_size)
        h0 = Variable(h.data)

        # An input goes through the d_encoder
        output, hn, cn = d_encoder(d_final, h, c)

        # Initialize the decoder
        ## calculate p0, h_bar0, c_bar0
        h_ = hn.squeeze(0)
        c_ = cn.squeeze(0)
        p = ext_cell.init_p(h0.squeeze(0), h_)

        ## calculate p_t, h_bar_t, c_bar_t
        d_encoder_hiddens = torch.cat((h0, output[:-1]), 0) #h0 ~ h_{n-1}
        extract_probs = Variable(torch.zeros(len(inputs))).cuda()
        for i, (s, h) in enumerate(zip(inputs, d_encoder_hiddens)):
            h_, c_, p = ext_cell(s, h, h_, c_, p)
            extract_probs[i] = p

        ## Document Classifier
        q = d_classifier(extract_probs.view(-1,1), d_encoded)
        
        # Define the loss functions
        optimizer.zero_grad() # flush the gradients
        loss_ext = loss_fn_ext(extract_probs, targets)
        loss_dclass = loss_fn_dclass(q.view(1,-1), doc_class)
        epoch_loss_ext += loss_ext.data.cpu().numpy()[0]
        epoch_loss_dclass += loss_dclass.data.cpu().numpy()[0]
        
        # backward() with Multiple objectives
        torch.autograd.backward([loss_ext, loss_dclass])
        optimizer.step()
        
        # Measure the accuracy
        p_cpu = extract_probs.data.cpu().numpy()
        t_cpu = targets.data.cpu().numpy()
        q_cpu = q.data.cpu().numpy()
        c_cpu = doc_class.data.cpu().numpy()
        epoch_accuracy_ext += get_accuracy(p_cpu, t_cpu)
        epoch_accuracy_dclass += get_accuracy(q_cpu, c_cpu)
        
    acc_ext = epoch_accuracy_ext / len(docs)
    acc_dclass = epoch_accuracy_dclass / len(docs)
    
    return epoch_loss_ext, epoch_loss_dclass, acc_ext, acc_dclass

def train(docs, n_epochs = 10, print_every = 1):
    import time
    
    for epoch in range(n_epochs):
        start_time = time.time()
        ext_loss, dclass_loss, ext_acc, dclass_acc = run_epoch(docs)
        end_time = time.time()
        wall_clock = (end_time - start_time) / 60
        if epoch % print_every == 0:
            print('Epoch:%2i / Loss:(%.3f/%.3f) / Accuracy:(%.3f/%.3f) / TrainingTime:%.3f(min)' %
                  (epoch, ext_loss, dclass_loss, ext_acc, dclass_acc, wall_clock))
        
# Initial Training
train(docs, n_epochs = 20, print_every = 1)

Epoch: 0 / Loss:(2808.866/-4252.482) / Accuracy:(0.583/0.493) / TrainingTime:3.075(min)


KeyboardInterrupt: 

In [None]:
# # Store the parameters
# torch.save(s_encoder.state_dict(), './data/params_s_encoder')
# torch.save(d_encoder.state_dict(), './data/params_d_encoder')
# torch.save(ext_cell.state_dict(), './data/params_ext_cell')
# torch.save(d_classifier.state_dict(), './data/params_ext_cell')