# Load the dataset

In [1]:
from modules.texts import Vocab, GloVeLoader
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
import modules.extractive as ext
import modules.abstractive as abs
import modules.beam_search as bs
from modules.data import Documents
from torch.utils.data import DataLoader

# Initialize the pretrained embeddings

In [2]:
import numpy as np

# Load the pretrained embedding into the memory
path_glove = os.path.join(os.path.expanduser('~'),
             'data/NLP/word_embeddings/GloVe/glove.6B.200d.txt')
glove = GloVeLoader(path_glove)

# Load the dataset
doc_file = './data/kaggle_news_rouge1.pkl'
docs = Documents(doc_file, n_samples=100, vocab_size = 30000)
vocab = docs.vocab

d = 200
emb = nn.Embedding(vocab.V, d)

def init_emb(emb, vocab):
    for word in vocab.word2id:
        try:
            emb.weight.data[vocab[word]] = torch.from_numpy(glove[word])
        except KeyError as e:
            # Case when pretrained embedding for a word does not exist
            pass
#     emb.weight.requires_grad = False # suppress updates
    print('Initialized the word embeddings.')

The pretrained vector file to use: /home/yhs/data/NLP/word_embeddings/GloVe/glove.6B.200d.txt
The number of words in the pretrained vector: 400000
The dimension of the pretrained vector: 200


In [105]:
# Test
from copy import deepcopy
from torch import optim
import time
from itertools import chain

vocab_size = vocab.V
emb_size = emb.weight.data.size(1)
n_kernels = 50
kernel_sizes = [1,2,3,4,5]
pretrained = emb
sent_size = len(kernel_sizes) * n_kernels
hidden_size = 400
num_layers = 1
n_classes = len(docs.dclass2id)
batch_size = 1
torch.manual_seed(7)
torch.cuda.manual_seed(7)

init_emb(emb, vocab)
ext_s_enc = ext.SentenceEncoder(vocab_size, emb_size,
                                   n_kernels, kernel_sizes, pretrained)
ext_d_enc = ext.DocumentEncoder(sent_size, hidden_size)
ext_extc = ext.ExtractorCell(sent_size, hidden_size)
ext_d_classifier = ext.DocumentClassifier(sent_size, n_classes)
abs_enc = abs.EncoderRNN(emb, hidden_size, num_layers)
abs_dec = abs.AttnDecoderRNN(emb, hidden_size * 2, num_layers)

models = [ext_s_enc, ext_d_enc, ext_extc, ext_d_classifier,
         abs_enc, abs_dec]
params = list(chain(*[model.parameters() for model in models]))
optimizer = optim.SGD(params, lr = .005)

loss_fn_ext = nn.BCELoss()
loss_fn_dclass = nn.NLLLoss()
loss_fn_abs = nn.CrossEntropyLoss()

def get_accuracy(probs, targets, verbose = False):   
    '''
    Calculates the accuracy for the extractor

    Args:
        probs: extraction probability
        targets: ground truth labels for extraction
    '''
    import numpy as np
    preds = np.array([1 if p > 0.5 else 0 for p in probs])
    if verbose:
        print(preds)
    accuracy = np.mean(preds == targets)
    
    return accuracy

# class RougeScorer:
#     def __init__(self):
#         from rouge import Rouge
#         self.rouge = Rouge()
#     def score(self, reference, generated, type = 1):
#         score = self.rouge.get_scores(reference, generated, avg=True)
#         score = score['rouge-%s' % type]['f']
#         return score

# rouge = RougeScorer()
    
def run_epoch(docs):
    
    epoch_loss_abs = 0
    epoch_loss_ext = 0
    epoch_loss_dclass = 0
    epoch_accuracy_ext = 0

    for doc in docs:
        optimizer.zero_grad()
        docloader = DataLoader(doc, batch_size=1, shuffle=False)
        # Encode the sentences in a document
        sents_raw = []
        sents_encoded = []
        ext_labels = []
        doc_class = Variable(torch.LongTensor([doc.doc_class])).cuda()
        for sent, ext_label in docloader:
            # only accept sentences that conforms the maximum kernel sizes
            if sent.size(1) < max(kernel_sizes):
                continue
            sent = Variable(sent).cuda()
            sents_raw.append(sent)
            sents_encoded.append(ext_s_enc(sent))
            ext_labels.append(ext_label.cuda())
        # Ignore if the content is a single sentence(no need to train)
        if len(sents_raw) <= 1:
            continue

        # Build the document representation using encoded sentences
        d_encoded = torch.cat(sents_encoded, dim = 0).unsqueeze(1)
        ext_labels = Variable(torch.cat(ext_labels, dim = 0).type(torch.FloatTensor).view(-1)).cuda()
        init_sent = ext_s_enc.init_sent(batch_size)
        d_ext = torch.cat([init_sent, d_encoded[:-1]], dim = 0)

        # Extractive Summarizer
        ## Initialize the d_encoder
        h, c = ext_d_enc.init_h0c0(batch_size)
        h0 = Variable(h.data)
        ## An input goes through the document encoder
        output, hn, cn = ext_d_enc(d_ext, h, c)
        ## Initialize the decoder
        ### calculate p0, h_bar0, c_bar0
        h_ = hn.squeeze(0)
        c_ = cn.squeeze(0)
        p = ext_extc.init_p(h0.squeeze(0), h_)
        ### calculate p_t, h_bar_t, c_bar_t
        d_encoder_hiddens = torch.cat((h0, output[:-1]), 0) #h0 ~ h_{n-1}
        extract_probs = Variable(torch.zeros(len(sents_encoded))).cuda()
        for i, (s, h) in enumerate(zip(sents_encoded, d_encoder_hiddens)):
            h_, c_, p = ext_extc(s, h, h_, c_, p)
            extract_probs[i] = p
        ## Document Classifier
        q = ext_d_classifier(extract_probs.view(-1,1), d_encoded.squeeze(1))
        
        ## Optimize over the extractive examples
        loss_ext = loss_fn_ext(extract_probs, ext_labels)
        loss_dclass = loss_fn_dclass(q.view(1,-1), doc_class)
        epoch_loss_ext += loss_ext.data.cpu().numpy()[0]
        epoch_loss_dclass += loss_dclass.data.cpu().numpy()[0]
        torch.autograd.backward([loss_ext, loss_dclass])
        optimizer.step()
        
        ## Measure the accuracy
        p_cpu = extract_probs.data.cpu().numpy()
        t_cpu = ext_labels.data.cpu().numpy()
        q_cpu = q.data.cpu().numpy()
        c_cpu = doc_class.data.cpu().numpy()
        epoch_accuracy_ext += get_accuracy(p_cpu, t_cpu)

        # Abstractive Summarizer
        optimizer.zero_grad()
        loss_abs = 0
        ## Run through the encoder
#         words = torch.cat(sents_ext, dim=1).t()
        sents_ext = [sent for i,sent in enumerate(sents_raw)
                     if extract_probs[i].data.cpu().numpy() > 0.5]
        
        # skip if no sentences are selected as summaries
        if len(sents_ext) == 0:
            continue
        words = torch.cat(sents_ext, dim=1).t()

        abs_enc_hidden = abs_enc.init_hidden(batch_size)
        abs_enc_output, abs_enc_hidden = abs_enc(words, abs_enc_hidden)
        ## Remove to too long documents to tackle memory overflow
        if len(abs_enc_output) > 6000:
            continue
        ## Run through the decoder
        abs_dec_hidden = abs_dec.init_hidden(batch_size)
        for target in doc.head:
            target = Variable(torch.LongTensor([target]).unsqueeze(1)).cuda()
            abs_dec_output, abs_dec_hidden, attn_weights = abs_dec(target, abs_dec_hidden, abs_enc_output)
            loss_abs += loss_fn_abs(abs_dec_output, target.squeeze(1))

        epoch_loss_abs += loss_abs.data.cpu().numpy()[0]
        loss_abs.backward()
        optimizer.step()

    acc_ext = epoch_accuracy_ext / len(docs)
    
    return epoch_loss_ext, epoch_loss_dclass, epoch_loss_abs, acc_ext

def train(docs, n_epochs = 10, print_every = 1):
    import time
    
    for epoch in range(n_epochs):
        start_time = time.time()
        ext_loss, dclass_loss, abs_loss, ext_acc = run_epoch(docs)
        end_time = time.time()
        wall_clock = (end_time - start_time) / 60
        if epoch % print_every == 0:
            print('Epoch:%2i / Loss:(%.3f/%.3f/%.3f) / Accuracy:(%.3f) / TrainingTime:%.3f(min)' %
                  (epoch, ext_loss, dclass_loss, abs_loss, ext_acc, wall_clock))

import os
from os.path import join            
# Training
train(docs, n_epochs = 50, print_every = 1)
# for n in range(5):
#     train(docs, n_epochs = 10, print_every = 1)
#     print('Epoch %2i finished.' % ((n+1)*10))
#     model_dict = dict()
#     model_dict['emb'] = emb
#     model_dict['ext_s_enc'] = ext_s_enc
#     model_dict['ext_d_enc'] = ext_d_enc
#     model_dict['ext_extc'] = ext_extc
#     model_dict['ext_d_classifier'] = ext_d_classifier
#     model_dict['abs_enc'] = abs_dec
#     model_dict['abs_dec'] = abs_dec

#     data_dir = join(os.path.expanduser('~'), 'cs671-large')
#     for name, model in model_dict.items():
#         torch.save(model.state_dict(), join(data_dir, name + '_epoch_%2i' % ((n+1)*10)) )

Initialized the word embeddings.
Epoch: 0 / Loss:(69.154/171.778/6974.253) / Accuracy:(0.507) / TrainingTime:0.137(min)
Epoch: 1 / Loss:(68.526/165.057/7867.030) / Accuracy:(0.595) / TrainingTime:0.176(min)
Epoch: 2 / Loss:(68.147/160.722/7178.985) / Accuracy:(0.595) / TrainingTime:0.177(min)
Epoch: 3 / Loss:(67.833/156.720/6649.148) / Accuracy:(0.595) / TrainingTime:0.176(min)
Epoch: 4 / Loss:(67.557/152.730/6201.312) / Accuracy:(0.595) / TrainingTime:0.177(min)
Epoch: 5 / Loss:(67.305/148.548/5821.301) / Accuracy:(0.595) / TrainingTime:0.176(min)
Epoch: 6 / Loss:(67.073/144.021/5469.786) / Accuracy:(0.595) / TrainingTime:0.177(min)
Epoch: 7 / Loss:(66.858/139.105/5119.098) / Accuracy:(0.595) / TrainingTime:0.176(min)
Epoch: 8 / Loss:(66.651/133.773/4763.871) / Accuracy:(0.595) / TrainingTime:0.177(min)
Epoch: 9 / Loss:(66.453/128.054/4403.646) / Accuracy:(0.595) / TrainingTime:0.176(min)
Epoch:10 / Loss:(66.262/121.971/4037.107) / Accuracy:(0.595) / TrainingTime:0.177(min)
Epoch:11 /

In [107]:
import torch
import torch.nn as nn
from torch.autograd import Variable
import modules.texts as texts
import torch.nn.functional as F

class BeamTree : 
    def __init__(self, log_prob, word_idx, parent_node = None) : 
        self.log_prob = log_prob
        self.word_idx = word_idx
        self.parent_node = parent_node
        # log probs from the root (all log_probs added)
        if parent_node is None : self.total_log_prob = 0
        else : self.total_log_prob = parent_node.log_prob + log_prob
        # whether this node is EOS
        self.is_done = False

    def __repr__(self) : 
        return '[%d, %.2f, %.2f, %s]' % (self.word_idx, self.log_prob, self.total_log_prob, self.is_done)
    
# select topk(beam_size) nodes for each batch
def select_topk(beam_size, tmp_beam_batch):
    beam_batch = []
    for tmp_beam in tmp_beam_batch:
        beam = []
        # firstly select previously EOS node
        for node in tmp_beam: 
            if node.is_done: beam.append(node)
        
        # remove selected node from tmp_beam_batch
        for node in beam: tmp_beam.remove(node)

        # sort by total log probability
        sorted_tmp_beam = sorted(tmp_beam, key = lambda BeamTree : -BeamTree.total_log_prob)
        for beam_idx in range(beam_size - len(beam)) : 
            beam.append(sorted_tmp_beam[beam_idx])
            if sorted_tmp_beam[beam_idx].word_idx == texts.EOS_token : sorted_tmp_beam[beam_idx].is_done = True
        beam_batch.append(beam)
    
    return beam_batch

def check_EOS(beam_batch):
    '''
    Checks if all nodes in the beam_batch are EOS
    '''
    for beam in beam_batch:
        for node in beam:
            if not node.is_done: return False
    
    return True

def beam2seq(beam_batch):
    seqs_batch = []
    top1_batch = []
    for beam in beam_batch:
        # sort each beam
        sorted_beam = sorted(beam, key = lambda BeamTree: -BeamTree.total_log_prob)
        # get strings from node
        seqs = []
        for node in sorted_beam:
            seqs.append([node2seq(node), node.total_log_prob])
        seqs_batch.append(seqs)
        top1_batch.append(seqs[0])
        
    return top1_batch, seqs_batch

def node2seq(leaf):
    '''
    Get a 
    '''
    seq = []
    cur = leaf
    while True:
        seq.append(cur.word_idx)
        cur = cur.parent_node
        if cur.word_idx == texts.SOS_token: break
    seq.reverse()
    
    return seq

In [113]:
def generate_title(doc_sents, beam_size, max_kernel_size, models, max_target_length = 100, batch_size = 1):
    '''
    Args:
        doc_sents: list of torch.LongTensors, where each elements can
        have variable length.
        beam_size (int)
        models (list): encoders and decoders
        max_kernel_size (int): maximum kernel size of the CNN sentence encoder
        max_target_length (int): maximum length that a sentence can have.
    '''
    assert len(models) == 6
    
    ext_s_enc = models[0]
    ext_d_enc = models[1]
    ext_extc = models[2]
    ext_d_classifier = models[3]
    abs_enc = models[4]
    abs_dec = models[5]
    
    # Encode the sentences in a document
    if len(doc_sents) <= 1:
        print('Error: The length of the document is %i.' % 1)
        return
    
    sents_raw = []
    sents_encoded = []
    for sent in doc_sents:
        if sent.size(1) < max_kernel_size:
            continue
        sent = Variable(sent).cuda()
        sents_raw.append(sent)
        sents_encoded.append(ext_s_enc(sent))
    
    # Build the document representation using encoded sentences
    d_encoded = torch.cat(sents_encoded, dim = 0).unsqueeze(1)
    init_sent = ext_s_enc.init_sent(batch_size)
    d_ext = torch.cat([init_sent, d_encoded[:-1]], dim = 0)
    
    # Extractive Summarizer
    ## Initialize the d_encoder
    h, c = ext_d_enc.init_h0c0(batch_size)
    h0 = Variable(h.data)
    ## An input goes through the document encoder
    output, hn, cn = ext_d_enc(d_ext, h, c)
    ## Initialize the decoder
    ### calculate p0, h_bar0, c_bar0
    h_ = hn.squeeze(0)
    c_ = cn.squeeze(0)
    p = ext_extc.init_p(h0.squeeze(0), h_)
    ### calculate p_t, h_bar_t, c_bar_t
    d_encoder_hiddens = torch.cat((h0, output[:-1]), 0) #h0 ~ h_{n-1}
    extract_probs = Variable(torch.zeros(len(sents_encoded))).cuda()
    for i, (s, h) in enumerate(zip(sents_encoded, d_encoder_hiddens)):
        h_, c_, p = ext_extc(s, h, h_, c_, p)
        extract_probs[i] = p.squeeze(0)
    ## Document Classifier
    q = ext_d_classifier(extract_probs.view(-1,1), d_encoded.squeeze(1))
    
    # Abstractive Summarizer
    sents_ext = [sent for i,sent in enumerate(sents_raw)
                 if extract_probs[i].data[0] > 0.5]
#     print(sents_ext)
    ## skip if no sentences are selected as summaries
    if len(sents_ext) == 0:
        print("No sentences are selected")
        return
    words = torch.cat(sents_ext, dim=1).t()
    print(words)
    abs_enc_hidden = abs_enc.init_hidden(batch_size)
    abs_enc_output, abs_enc_hidden = abs_enc(words, abs_enc_hidden)
    ## Remove to too long documents to tackle memory overflow
    if len(abs_enc_output) > 6000:
        print('Out of memory')
        return
    abs_dec_hidden = abs_dec.init_hidden(batch_size)
    abs_dec_input = Variable(torch.LongTensor([texts.SOS_token]).unsqueeze(1)).cuda()
    
    beam_batch = [[] for i in range(batch_size)]
    
    for t in range(max_target_length):
        if t == 0:
            print(abs_dec_input)
            abs_dec_output, abs_dec_hidden, _ = abs_dec(abs_dec_input, abs_dec_hidden, abs_enc_output)
            # (B = 1, V = vocab_size)
            abs_dec_prob = F.log_softmax(abs_dec_output)
            # print(abs_dec_prob.size())
            # Get top-k(beam size) values
            top_values, top_idxs = abs_dec_prob.data.topk(beam_size, dim = -1)
#             print(top_values)
            print(top_idxs)
            for batch_idx in range(batch_size):
                log_prob = 0 # p = 1
                root = BeamTree(log_prob, texts.SOS_token)
                for beam_idx in range(beam_size):
                    log_prob = top_values[batch_idx][beam_idx]
                    word_idx = top_idxs[batch_idx][beam_idx]
                    beam_batch[batch_idx].append(BeamTree(log_prob, word_idx, root))
            print(beam_batch)
        else:
            tmp_beam_batch = [[] for i in range(batch_size)]
            for beam_idx in range(beam_size): 
                abs_dec_input = []
                for batch_idx in range(batch_size): 
                    # decoder inputs are words in current beam
                    abs_dec_input.append(beam_batch[batch_idx][beam_idx].word_idx)
                
                # Regard each beams as seperate batches
                # (1, beam_size)
                abs_dec_input = Variable(torch.LongTensor(abs_dec_input).view(1,-1)).cuda()
                abs_dec_output, abs_dec_hidden, _ = abs_dec(abs_dec_input, abs_dec_hidden, abs_enc_output)
                # (B, V)
                
                abs_dec_prob = F.log_softmax(abs_dec_output)

                # get top k(beam size) values
                top_values, top_idxs = abs_dec_prob.data.topk(beam_size, dim = -1)
#                 print(top_idxs)
                for batch_idx in range(batch_size):
                    for tmp_beam_idx in range(beam_size): 
                        # if current word is EOS, add it to tmp_beam_batch instead of its children
                        if beam_batch[batch_idx][beam_idx].word_idx == texts.EOS_token: 
                            tmp_beam_batch[batch_idx].append(beam_batch[batch_idx][beam_idx])
                            break
                        log_prob = top_values[batch_idx][tmp_beam_idx]
                        word_idx = top_idxs[batch_idx][tmp_beam_idx]
                        tmp_beam_batch[batch_idx].append(BeamTree(log_prob, word_idx, beam_batch[batch_idx][beam_idx]))

            # get the new beam
            beam_batch = select_topk(beam_size, tmp_beam_batch)
            # check if all nodes in the beam_batch are EOS
            if check_EOS(beam_batch): break
    
    top1_batch, seqs_batch = beam2seq(beam_batch)
    
    return top1_batch, seqs_batch

from rouge import Rouge

doc_i = 0
models = [ext_s_enc, ext_d_enc, ext_extc, ext_d_classifier, abs_enc, abs_dec]
test_input = [torch.LongTensor(sent).view(1,-1) for sent in docs[doc_i].sents]
ref_input = torch.LongTensor(docs[doc_i].head).view(1,-1)
top1_batch, seqs_batch = generate_title(doc_sents = test_input,
                                           beam_size = 5,
                                           models = models,
                                           max_kernel_size = 5)

print('Lengths of the generated %s' % str(list(map(len, seqs_batch))))

rouge = Rouge()
total_rouge = 0
for i in range(len(top1_batch)):
    generated = vocab.id2sents([top1_batch[i][0]])
    reference = vocab.id2sents([ref_input[i]])
    print(generated)
    print(reference)
    total_rouge += rouge.get_scores(generated, reference)[0]['rouge-1']['f']

print(total_rouge / len(top1_batch))
print(seqs_batch)

Variable containing:
    1
 6200
 1800
  629
 2079
  458
 4402
 6717
 6788
  335
 1391
 6199
  750
 6797
 5880
 6282
 6259
 5027
 4402
 3840
 1464
  495
 6200
 4433
 6382
  335
  857
 2693
 2264
  629
 6689
 5340
  670
 4402
 5773
 3933
   46
 6200
 6478
 6182
  315
 5421
  458
 6689
 2646
 6282
 5300
 6789
  191
 3078
 4376
 3360
 6200
 1391
 6199
 3802
 3361
 1543
 2643
 3365
 5880
 6282
 1282
 5028
  774
 6809
   46
    2
    1
  315
 3361
 2945
  951
 1842
 6282
 1282
 6200
 2541
 4376
 5028
 4402
  801
  274
   46
    2
    1
 3194
 6229
 1579
   40
  563
 4387
   61
 1911
 5628
 5191
 4412
  629
 1282
 6200
 2541
 1469
  774
  335
 6025
 6267
 6743
  563
 6200
 3604
 5880
 5628
 6259
 5027
 6282
 6206
 1464
   42
    2
    1
 6706
 5671
 3824
 6200
 4876
 4376
  335
 6809
  315
    2
    1
  619
 4388
 6289
 3022
 6268
 2182
 3194
 6200
 1817
   46
    2
    1
 5639
 5146
 6282
  933
 3138
   46
 6200
 4324
 6689
 3358
 4402
 1800
  629
 2079
  460
  629
 2661
 2877
 3048
 4005
 

In [60]:
from rouge import Rouge

doc_i = 10
models = [ext_s_enc, ext_d_enc, ext_extc, ext_d_classifier, abs_enc, abs_dec]
test_input = [torch.LongTensor(sent).view(1,-1) for sent in docs[doc_i].sents]
ref_input = torch.LongTensor(docs[doc_i].head).view(1,-1)
top1_batch, seqs_batch = generate_title(doc_sents = test_input,
                                           beam_size = 5,
                                           models = models,
                                           max_kernel_size = 5)

print('Lengths of the generated %s' % str(list(map(len, seqs_batch))))

rouge = Rouge()
total_rouge = 0
for i in range(len(top1_batch)):
    generated = vocab.id2sents([top1_batch[i][0]])
    reference = vocab.id2sents([ref_input[i]])
    print(reference)
    total_rouge += rouge.get_scores(generated, reference)[0]['rouge-1']['f']

print(total_rouge / len(top1_batch))
print(seqs_batch)

Lengths of the generated [5]
_BEGIN_ food regulator planning leftover banks to feed hungry people _END_
0.16666666513888892
[[[[1], -0.00043487548828125], [[1], -0.00043487548828125], [[1], -0.00043487548828125], [[1], -0.00043487548828125], [[2162], -10.48104190826416]]]


In [46]:
# # Test
# from copy import deepcopy
# from torch import optim
# import time
# from itertools import chain

# vocab_size = vocab.V
# emb_size = emb.weight.data.size(1)
# n_kernels = 50
# kernel_sizes = [1,2,3,4,5]
# pretrained = emb
# sent_size = len(kernel_sizes) * n_kernels
# hidden_size = 400
# num_layers = 1
# n_classes = len(docs.dclass2id)
# batch_size = 1
# torch.manual_seed(7)
# torch.cuda.manual_seed(7)

# init_emb(emb, vocab)
# ext_s_enc = ext.SentenceEncoder(vocab_size, emb_size,
#                                    n_kernels, kernel_sizes, pretrained)
# ext_d_enc = ext.DocumentEncoder(sent_size, hidden_size)
# ext_extc = ext.ExtractorCell(sent_size, hid\den_size)
# ext_d_classifier = ext.DocumentClassifier(sent_size, n_classes)
# abs_enc = abs.EncoderRNN(emb, hidden_size, num_layers)
# abs_dec = abs.AttnDecoderRNN(emb, hidden_size * 2, num_layers)

# models = [ext_s_enc, ext_d_enc, ext_extc, ext_d_classifier,
#          abs_enc, abs_dec]
# params = list(chain(*[model.parameters() for model in models]))
# optimizer = optim.SGD(params, lr = .005)

# loss_fn_ext = nn.BCELoss()
# loss_fn_dclass = nn.NLLLoss()
# loss_fn_abs = nn.CrossEntropyLoss()

# def get_accuracy(probs, targets, verbose = False):   
#     '''
#     Calculates the accuracy for the extractor

#     Args:
#         probs: extraction probability
#         targets: ground truth labels for extraction
#     '''
#     import numpy as np
#     preds = probs > .5
#     if verbose:
#         print(preds)
#     matches = preds.type(torch.cuda.FloatTensor) == targets
#     accuracy = torch.mean(matches.type(torch.cuda.FloatTensor))
    
#     return accuracy

    
# def run_epoch(docs):
    
#     epoch_loss_abs = 0
#     epoch_loss_ext = 0
#     epoch_loss_dclass = 0
#     epoch_accuracy_ext = 0

#     for doc in docs:
#         optimizer.zero_grad()
#         docloader = DataLoader(doc, batch_size=1, shuffle=False)
#         # Encode the sentences in a document
#         sents_raw = []
#         sents_encoded = []
#         ext_labels = []
#         doc_class = Variable(torch.LongTensor([doc.doc_class])).cuda()
#         for sent, ext_label in docloader:
#             # only accept sentences that conforms the maximum kernel sizes
#             if sent.size(1) < max(kernel_sizes):
#                 continue
#             sent = Variable(sent).cuda()
#             sents_raw.append(sent)
#             sents_encoded.append(ext_s_enc(sent))
#             ext_labels.append(ext_label.cuda())
#         # Ignore if the content is a single sentence(no need to train)
#         if len(sents_raw) <= 1:
#             continue

#         # Build the document representation using encoded sentences
#         d_encoded = torch.cat(sents_encoded, dim = 0).unsqueeze(1)
#         ext_labels = Variable(torch.cat(ext_labels, dim = 0).type(torch.FloatTensor).view(-1)).cuda()
#         init_sent = ext_s_enc.init_sent(batch_size)
#         d_ext = torch.cat([init_sent, d_encoded[:-1]], dim = 0)

#         # Extractive Summarizer
#         ## Initialize the d_encoder
#         h, c = ext_d_enc.init_h0c0(batch_size)
#         h0 = Variable(h.data)
#         ## An input goes through the document encoder
#         output, hn, cn = ext_d_enc(d_ext, h, c)
#         ## Initialize the decoder
#         ### calculate p0, h_bar0, c_bar0
#         h_ = hn.squeeze(0)
#         c_ = cn.squeeze(0)
#         p = ext_extc.init_p(h0.squeeze(0), h_)
#         ### calculate p_t, h_bar_t, c_bar_t
#         d_encoder_hiddens = torch.cat((h0, output[:-1]), 0) #h0 ~ h_{n-1}
#         extract_probs = Variable(torch.zeros(len(sents_encoded))).cuda()
#         for i, (s, h) in enumerate(zip(sents_encoded, d_encoder_hiddens)):
#             h_, c_, p = ext_extc(s, h, h_, c_, p)
#             extract_probs[i] = p
#         ## Document Classifier
#         q = ext_d_classifier(extract_probs.view(-1,1), d_encoded.squeeze(1))
        
#         ## Optimize over the extractive examples
#         loss_ext = loss_fn_ext(extract_probs, ext_labels)
#         loss_dclass = loss_fn_dclass(q.view(1,-1), doc_class)
#         epoch_loss_ext += loss_ext.data
#         epoch_loss_dclass += loss_dclass.data
#         torch.autograd.backward([loss_ext, loss_dclass])
#         optimizer.step()
        
#         ## Measure the accuracy
# #         q_cpu = q.data
# #         c_cpu = doc_class.data
#         epoch_accuracy_ext += get_accuracy(extract_probs.data, ext_labels.data)

#         # Abstractive Summarizer
#         optimizer.zero_grad()
#         loss_abs = 0
#         ## Run through the encoder
# #         words = torch.cat(sents_ext, dim=1).t()
#         sents_ext = [sent for i,sent in enumerate(sents_raw)
#                      if extract_probs[i].data[0] > 0.5]
        
#         # skip if no sentences are selected as summaries
#         if len(sents_ext) == 0:
#             continue
#         words = torch.cat(sents_ext, dim=1).t()

#         abs_enc_hidden = abs_enc.init_hidden(batch_size)
#         abs_enc_output, abs_enc_hidden = abs_enc(words, abs_enc_hidden)
#         ## Remove to too long documents to tackle memory overflow
#         if len(abs_enc_output) > 6000:
#             continue
#         ## Run through the decoder
#         abs_dec_hidden = abs_dec.init_hidden(batch_size)
#         for target in doc.head:
#             target = Variable(torch.LongTensor([target]).unsqueeze(1)).cuda()
#             abs_dec_output, abs_dec_hidden, attn_weights = abs_dec(target, abs_dec_hidden, abs_enc_output)
#             loss_abs += loss_fn_abs(abs_dec_output, target.squeeze(1))

#         epoch_loss_abs += loss_abs.data
#         loss_abs.backward()
#         optimizer.step()

#     acc_ext = epoch_accuracy_ext / len(docs)
    
#     return epoch_loss_ext, epoch_loss_dclass, epoch_loss_abs, acc_ext

# def train(docs, n_epochs = 10, print_every = 1):
#     import time
    
#     for epoch in range(n_epochs):
#         start_time = time.time()
#         ext_loss, dclass_loss, abs_loss, ext_acc = run_epoch(docs)
#         ext_loss = ext_loss.cpu().numpy()[0]
#         dclass_loss = dclass_loss.cpu().numpy()[0]
#         abs_loss = abs_loss.cpu().numpy()[0]
#         end_time = time.time()
#         wall_clock = (end_time - start_time) / 60
#         if epoch % print_every == 0:
#             print('Epoch: %i',epoch)
#             print('Extractive Loss: ',ext_loss)
#             print('Classification Loss: ',dclass_loss)
#             print('Abstractive Loss: ',abs_loss)
#             print('Extractive Accuracy: ', ext_acc)
#             print('Training Time: %.3f(min)' % wall_clock)

# import os
# from os.path import join            
# # Training
# for n in range(5):
#     train(docs, n_epochs = 1, print_every = 1)
#     print('Epoch %2i finished.' % ((n+1)*10))
# #     model_dict = dict()
# #     model_dict['emb'] = emb
# #     model_dict['ext_s_enc'] = ext_s_enc
# #     model_dict['ext_d_enc'] = ext_d_enc
# #     model_dict['ext_extc'] = ext_extc
# #     model_dict['ext_d_classifier'] = ext_d_classifier
# #     model_dict['abs_enc'] = abs_enc
# #     model_dict['abs_dec'] = abs_dec

# #     data_dir = join(os.path.expanduser('~'), 'cs671-large')
# #     for name, model in model_dict.items():
# #         torch.save(model.state_dict(), join(data_dir, name + '_epoch_%2i' % ((n+1)*10)) )

Initialized the word embeddings.


KeyboardInterrupt: 