# Load documents(variable-length)

In [2]:
from custom_utils.preprocess import Vocab
with open('./data/dl_history.txt') as f:
    text = f.read()

vocab = Vocab(text, top_k = 50)

print(vocab.V)

sents = vocab.sents2id(text)
# print(sents)
# print(vocab.id2sents(sents))
# print(vocab[0])
# print(vocab[vocab[0]])

onehot = vocab.sent2onehot(sents[0])
# print(onehot.size())
# print(vocab.onehot2sent(onehot))

import numpy as np

# Build the Training dataset
input_docs = [vocab.sent2onehot(sent) for sent in sents]
# Build inputs / targets as lists of tensors
np.random.seed(0)
target_docs = [np.random.randint(2, size=len(sent)).tolist()
               for sent in input_docs]

50


# Train

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from modules.layers import DocumentEncoder, ExtractorCell

input_size = vocab.V
hidden_size = 100
batch_size = 1

####WARNING: No mini-batch processing#########

encoder = DocumentEncoder(input_size, hidden_size)
extc = ExtractorCell(input_size, hidden_size)

# Binary Cross-Entropy loss
loss_fn = nn.BCELoss()
params = list(encoder.parameters()) + list(extc.parameters())
optimizer = optim.Adam(params, lr = .005)

def run_epoch(input_docs, target_docs):
    
    epoch_loss = 0
    
    # Train over the whole document
    for input, target in zip(input_docs, target_docs):
        # flush the gradients
        optimizer.zero_grad()

        input = Variable(input).view(input.size(0),1,input.size(1)).cuda()
        target = Variable(torch.FloatTensor(target)).cuda()

        # Initialize the encoder
        h, c = encoder.init_h0c0(batch_size)
        h0 = Variable(h.data)

        # An input goes through the encoder
        output, hn, cn = encoder(input, h, c)

        # Initialize the decoder
        ## calculate p0, h_bar0, c_bar0
        h_ = hn.squeeze(0)
        c_ = cn.squeeze(0)
        p = extc.init_p(h0.squeeze(0), h_)

        ## calculate p_t, h_bar_t, c_bar_t
        encoder_hiddens = torch.cat((h0, output[:-1]), 0) #h0 ~ h_{n-1}
        extract_probs = Variable(torch.zeros(input.size(0))).cuda()
        for i, (s, h) in enumerate(zip(input, encoder_hiddens)):
            h_, c_, p = extc(s, h, h_, c_, p)
            extract_probs[i] = p
        loss = loss_fn(extract_probs, target)
        epoch_loss += loss.data.cpu().numpy()
        loss.backward()
        optimizer.step()
    
    return epoch_loss

def train(input_docs, target_docs, n_epochs = 100, print_every = 10):
    total_loss = 0.0
    for epoch in range(n_epochs):
        epoch_loss = run_epoch(input_docs, target_docs)
        if epoch % print_every == 0:
            print('Epoch: %2i / Loss: %.7f' % (epoch, epoch_loss))
        
# Initial Training
train(input_docs, target_docs, n_epochs = 100, print_every = 10)

Epoch:  0 / Loss: 4.1718683
Epoch: 10 / Loss: 2.2915561
Epoch: 20 / Loss: 2.2757397
Epoch: 30 / Loss: 0.5868411
Epoch: 40 / Loss: 0.0598319
Epoch: 50 / Loss: 0.0094373
Epoch: 60 / Loss: 0.0044017
Epoch: 70 / Loss: 0.0027340
Epoch: 80 / Loss: 0.0018196
Epoch: 90 / Loss: 0.0013290
