In [3]:
import torch
from torch import nn
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import json
import random

class UnderstandPassage(nn.Module):
    
    def __init__(self, embedding, hidden_size, dropout, voc):
        
        
        super(UnderstandPassage, self).__init__()
        
        self.embedding = embedding
        self.embedding_dim = self.embedding.size(1)
        self.voc = voc
        self.voc_size = len(voc)
        
        self.dropout = nn.Dropout(dropout)
        
        self.enc_net = nn.LSTM(input_size=hidden_size*2,
                                hidden_size=hidden_size, 
                                bias=True)
        
        self.dec_net = nn.LSTM(input_size=embedding_dim+hidden_size,
                                hidden_size=hidden_size, 
                                bias=True)
        
        self.fc_net = nn.Linear(hidden_size, self.voc_size)
    
        self.cost_func = self.init_cost_func()
        
    def init_cost_func(self):
        
        weight = [1 for i in range(len(self.voc))]
        weight[voc['pad#']] = 0
        
        return nn.CrossEntropyLoss(weight=torch.Tensor(weight))
        
    def enc(self, passage_encoders, answer_encoders):
        
        '''
        answer_encoders batch, hidden_size
        passage_encoders batch, pn_steps, hidden_size
        '''
        
        answer_encoders = answer_encoders.expand(passage_encoders.size(0), answer_encoders.size(0), self.hidden_size)
        inputs = torch.cat([passage_encoders, answer_encoders], -1) # pn_steps, batch, hidden_size*2
        
        encoders, hidden = self.enc_net(inputs)
        encoders = self.dropout(encoders[-1]) #  batch, hidden_size
        
        return encoders
        
    def dec(self, encoders, decoder_inputs, is_teacher_forcing, max_question_len):
        
        '''
        if is_teacher_forcing: decoder_inputs (batch, max_question_len)
        if not is_teacher_forcing: decoder_inputs (batch, 1)
        '''
        
        decoder_inputs = Variable(decoder_inputs).long().cuda()
        decoder_inputs = self.embedding(decoder_inputs)
        decoder_inputs = decoder_inputs.transpose(0, 1)
        
        encoders = encoders.expand(decoder_inputs.size(0), encoders.size(0), self.hidden_size)
        inputs = torch.cat([decoder_inputs, encoders], -1)
        
        if is_teacher_forcing:
            
            outputs, hidden = self.dec_net(inputs)
            outputs = self.dropout(outputs)
            logits = self.fc_net(outputs) # qn_steps, batch, voc_size
            
            _, predictions = torch.max(logits.transpose(0, 1), -1) #batch, qn_steps
            predictions = predictions.cpu().data.numpy()
            
        else:
            logits = [0 for i in range(max_question_len)]
            predictions = [0 for i in range(max_question_len)]
            
            output, hidden = self.dec_net(inputs)
            output = self.dropout(output)
            logits[0] = self.fc_net(output)
            
            _, index = torch.max(logits[0])
            
            logits[0] = logits[0].view(1, decoder_inputs.size(1), self.voc_size) # 1，batch_size, voc_size
            predictions[0] = index.cpu().data.numpy() # batch_size
            
            for i in range(1, max_question_len):
                
                prev_output = Variable(predictions[i-1]).long().cuda()
                prev_output = self.embedding(prev_output)
                inputs = torch.cat([prev_output, encoders[0]], -1)
                
                output, hidden = self.dec_net(inputs)
                output = self.dropout(output)
                logits[i] = self.fc_net(output)

                _, index = torch.max(logits[i])
                
                logits[i] = logits[i].view(1, decoder_inputs.size(0), self.voc_size) # 1，batch_size, voc_size
                predictions[i] = index.cpu().data.numpy() # batch_size
            
            logits = torch.cat(logits)# qn_steps, batch, voc_size
            predictions = np.array(predictions).transpose(1, 0)
            
        return logits, predictions
            
        
    def forward(self, passage_encoders, answer_encoders, decoder_inputs=None, is_teacher_forcing = True, max_question_len=0):
        
        '''
        answer_encoders (batch, an_steps, hidden_size)
        passage_encoders (batch, pn_steps, hidden_size)
        
        if is_teacher_forcing: decoder_inputs (batch, max_question_len)
        if not is_teacher_forcing: decoder_inputs (None)
        '''
        
        passage_encoders = passage_encoders.transpose(0, 1) # pn_steps, batch, hidden_size
        answer_encoders = answer_encoders.transpose(0, 1) # an_steps, batch, hidden_size
        
        encoders = self.enc(passage_encoders, answer_encoders)
        
        if decoder_inputs == None:
            decoder_inputs = torch.Tensor([[self.voc['go#']]]*encoders.size(0))
        logits, predictions = self.dec(encoders, decoder_inputs, is_teacher_forcing, max_question_len)
        
    def get_loss(self, logits, labels):
        
        labels = Variable(labels).long().cuda()
        labels = labels.transpose(0, 1)
        
        logits = logits.contiguous().view(-1, logits.size(-1))
        labels = labels.contiguous().view(-1)
        
        loss = torch.mean(self.cost_func(logits, labels))
        
        return loss
    
        
        
        

In [4]:
import torch
from torch import nn
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import json
import random


class UnderQuestion(nn.Module):
    
    def __init__(self, dropout, hidden_size):
        
        super(UnderQuestion, self).__init__()
        
        self.hidden_layer = nn.Linear(hidden_size*2, hidden_size)
        self.fc_net = nn.Linear(hidden_size, 2)
        self.softmax = nn.Softmax()
        
        self.dropout = nn.Dropout(dropout)
        self.cost_func = nn.CrossEntropyLoss()
        
        
    def forward(self, question_encoders, answer_encoders):
        
        '''
        question_encoders  batch, qn_steps, hidden_size
        answer_encoders   batch, an_steps, hidden_size
        '''
        
        question_encoders = question_encoders.tranpose(0, 1) #qn_steps, batch, hidden_size
        answer_encoders = answer_encoders.tranpose(0, 1) #an_steps, batch, hidden_size
        
        question_encoders = question_encoders[-1] # batch, hidden_size
        answer_encoders = answer_encoders[-1] # batch, hidden_size
        
        inputs = torch.cat([question_encoders, answer_encoders], -1)
        
        hidden_layer = self.hidden_layer(inputs)
        hidden_layer = self.dropout(hidden_layer)
        
        logits = self.fc_net(hidden_layer)
        _, predictions = torch.max(logits)
        predictions = predictions.cpu().data.numpy()
        
        return logits, predictions
        
        
    def get_loss(self, logits, labels):
        
        labels = Variable(labels).long().cuda()
        loss = self.cost_func(logits, labels)
        
        return loss

In [5]:
import torch
from torch import nn
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import json
import random

embedding_dir = '/data/xuwenshen/workspace/squad/data/train_embedding.json'

class MatchLayer(nn.Module):
    
    def __init__(self, hidden_size, dropout):
        
        super(MatchLayer, self).__init__()
        
        self.hidden_size = hidden_size
        
        self.dropout = nn.Dropout(dropout)
        self.fw_match_lstm = nn.LSTMCell(input_size=hidden_size*2,
                                         hidden_size=hidden_size,
                                         bias=True)
        
        self.bw_match_lstm = nn.LSTMCell(input_size=hidden_size*2,
                                         hidden_size=hidden_size,
                                         bias=True)
        
        self.whq_net = nn.Linear(hidden_size, hidden_size)
        self.whp_net = nn.Linear(hidden_size, hidden_size)
        self.whr_net = nn.Linear(hidden_size, hidden_size)
        self.w_net = nn.Linear(hidden_size, 1)
        
        self.softmax = nn.Softmax()
        self.tanh = nn.Tanh()
        
    def forward(self, passage_encoders, question_encoders):
        
        passage_encoders = passage_encoders.transpose(0, 1) # pn_steps, batch, hidden_size
        question_encoders = question_encoders.transpose(0, 1) # qn_steps, batch, hidden_size
        
        
        wq_matrix = self.whq_net(question_encoders) # qn_steps, batch, hidden_size
        wq_matrix = self.dropout(wq_matrix)
        
        wp_matrix = self.whp_net(passage_encoders) # pn_steps, batch, hidden_size
        wp_matrix = self.dropout(wp_matrix)
        
        # forward match lstm (pn_steps, batch, hidden_size)
        fw_match = self.match(passage_encoders, question_encoders, wq_matrix, wp_matrix, fw = True)
        
        
        # backward match lstm (pn_steps, batch, hidden_size)
        bw_match = self.match(passage_encoders, question_encoders, wq_matrix, wp_matrix, fw = False)
        
        match_encoders = torch.cat([fw_match, bw_match], -1) # (pn_steps, batch, hidden_size * 2)
        
        #print ('fw_match.size(): ', fw_match.size())
        #print ('bw_match.size(): ', bw_match.size())
        #print ('match_encoders.size(): ', match_encoders.size())
        
        return match_encoders
        
    def match(self, passage_encoders, question_encoders, wq_matrix, wp_matrix, fw = True):
        
        '''
        passage_encoders (pn_steps, batch, hidden_size)
        question_encoders (qn_steps, batch, hidden_size)
        wq_matrix (qn_steps, batch, hidden_size)
        wp_matrix (pn_steps, batch, hidden_size)
        '''
        if fw:
            match_lstm = self.fw_match_lstm
            start = 0
            end = passage_encoders.size(0)
            stride = 1
        else:
            match_lstm = self.bw_match_lstm
            start = passage_encoders.size(0) - 1
            end = -1
            stride = -1
        
        hx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda()
        cx = Variable(torch.zeros(passage_encoders.size(1), self.hidden_size)).cuda()
        
        match_encoders = [0 for i in range(passage_encoders.size(0))]
        
        for i in range(start, end, stride):
            
            wphp = wp_matrix[i]
            wrhr = self.whr_net(hx)

            _sum = torch.add(wphp, wrhr) # batch, hidden_size
            _sum = _sum.expand(wq_matrix.size(0), wq_matrix.size(1), self.hidden_size) # qn_steps, batch, hidden_size
            
            g = self.tanh(torch.add(wq_matrix, _sum)) # qn_steps, batch, hidden_size

            g = torch.transpose(g, 0, 1)# batch, qn_steps, hidden_size
            
            wg = self.w_net(g) # bactch, qn_steps, 1
            wg = wg.squeeze(-1) # bactch, qn_steps
            alpha = wg # bactch, qn_steps
            alpha = self.softmax(alpha).view(alpha.size(0), 1, alpha.size(1)) # batch,1, qn_steps
            
            
            attentionv = torch.bmm(alpha, question_encoders.transpose(0, 1)) # bacth, 1, hidden_size
            attentionv = attentionv.squeeze(1) # bacth, hidden_size
            
            inp = torch.cat([passage_encoders[i], attentionv], -1)
                        
            hx, cx = match_lstm(inp, (hx, cx)) # batch, hidden_size
            
            match_encoders[i] = hx.view(1, hx.size(0), -1)
            
        match_encoders = torch.cat(match_encoders)
        
        return match_encoders

        
class AnswerLayer(nn.Module):
    
    def __init__(self, hidden_size, dropout, passage_len):
        
        super(AnswerLayer, self).__init__()
        
        self.hidden_size = hidden_size
        self.pointer_lstm = nn.LSTMCell(input_size=hidden_size*2,
                                        hidden_size=hidden_size, 
                                        bias=True)
        self.vh_net = nn.Linear(hidden_size*2, hidden_size)
        self.wa_net = nn.Linear(hidden_size, hidden_size)
        self.v_net = nn.Linear(hidden_size, 1)
        
        self.softmax = nn.Softmax()
        self.dropout = nn.Dropout()
        self.tanh = nn.Tanh()
        
        self.cost_func = nn.CrossEntropyLoss()
        
    def forward(self, match_encoders):
        
        '''
        match_encoders (pn_steps, batch, hidden_size*2)
        '''
        vh_matrix = self.vh_net(match_encoders) # pn_steps, batch, hidden_size
        
        # prediction start
        h0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda()
        c0 = Variable(torch.zeros(match_encoders.size(1), self.hidden_size)).cuda()
        
        wha1 = self.wa_net(h0) # bacth, hidden_size
        wha1 = wha1.expand(match_encoders.size(0), wha1.size(0), wha1.size(1)) # pn_steps, batch, hidden_size
        #print ('_sum.size() ', _sum.size())
        #print ('vh_matrix.size() ', vh_matrix.size())
        f1 = self.tanh(vh_matrix + wha1) # pn_steps, batch, hidden_size
        #print ('f1.size() ', f1.size())
        vf1 = self.v_net(f1.transpose(0, 1)).squeeze(-1) #batch, pn_steps
        
        beta1 = self.softmax(vf1) #batch, pn_steps
        softmax_beta1 = self.softmax(beta1).view(beta1.size(0), 1, beta1.size(1)) #batch, 1, pn_steps
        
        inp = torch.bmm(softmax_beta1, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size
        inp = inp.squeeze(1) # bacth, hidden_size
        
        h1, c1 = self.pointer_lstm(inp, (h0, c0))
        
        
        wha2 = self.wa_net(h1) # bacth, hidden_size
        wha2 = wha2.expand(match_encoders.size(0), wha2.size(0), wha2.size(1)) # pn_steps, batch, hidden_size
        f2 = self.tanh(vh_matrix + wha2) # pn_steps, batch, hidden_size
        vf2 = self.v_net(f2.transpose(0, 1)).squeeze(-1) #batch, pn_steps
        
        beta2 = self.softmax(vf2)#batch, pn_steps
        softmax_beta2 = self.softmax(beta2).view(beta2.size(0), 1, beta2.size(1)) #batch, 1, pn_steps
        
        inp = torch.bmm(softmax_beta2, match_encoders.transpose(0, 1)) # bacth, 1, hidden_size
        inp = inp.squeeze(1) # bacth, hidden_size
        
        h2, c2 = self.pointer_lstm(inp, (h1, c1))
            
        _, start = torch.max(beta1, 1)
        _, end = torch.max(beta2, 1)
        
        beta1 = beta1.view(1, beta1.size(0), beta1.size(1))
        beta2 = beta2.view(1, beta2.size(0), beta2.size(1))
        
        logits = torch.cat([beta1, beta2])
        
        start = start.view(1, start.size(0))
        end = end.view(1, end.size(0))
        
        prediction = torch.cat([start, end]).transpose(0, 1).cpu().data.numpy()
        

        return logits, prediction
    
    
class MatchLSTM(nn.Module):
    
    def __init__(self, hidden_size, dropout, passage_len):
        
        super(MatchLSTM, self).__init__()
        
        self.match = MatchLayer(hidden_size=hidden_size, dropout=dropout)
        self.answer = AnswerLayer(hidden_size=hidden_size, dropout=dropout, passage_len=passage_len)
        
        self.cost_func = None
        
    
    def forward(self, passage_encoders, question_encoders):
        
        match_encoders = self.match(passage_encoders, question_encoders)
        logits, prediction = self.answer(match_encoders)
        
        return logits, prediction
        
    def CrossEntropyLoss(self, logits, labels):
        cost_func = nn.CrossEntropyLoss()
        
        labels = Variable(labels).long().cuda()
        labels = labels.transpose(0, 1)
        loss = (cost_func(logits[0], labels[0])+ cost_func(logits[1], labels[1])) / 2
        
        return loss
   
    def MSELoss(self, logits, labels):
        
        cost_func = nn.MSELoss(size_average=False)
        
        ids = labels.transpose(0, 1)
        ids = ids.contiguous().view(ids.size(0), ids.size(1), 1)
        one_hot = Variable(torch.zeros(logits.size(0), logits.size(1), logits.size(2)).scatter_(-1, ids, 1)).cuda()
        
        loss = cost_func(logits, one_hot)

        return loss


    def get_loss(self, logits, labels):
        
        #return self.CrossEntropyLoss(logits, labels)
        return self.MSELoss(logits, labels)
      
    

In [None]:
import torch
from torch import nn
from torch import autograd
from torch.autograd import Variable
import torch.nn.functional as F
import numpy as np
import json
import random

embedding_dir = '/data/xuwenshen/workspace/squad/data/train_embedding.json'
voc = '/data/xuwenshen/workspace/squad/data/voc.json'

class PreprocessLayer(nn.Module):
    
    def __init__(self, hidden_size, dropout, embedding_dim):
        
        super(PreprocessLayer, self).__init__()
        
        self.passage_lstm = nn.LSTM(input_size=embedding_dim,
                                    hidden_size=hidden_size,
                                    num_layers=1,
                                    dropout=dropout,
                                    batch_first=True)
        
        self.question_lstm = nn.LSTM(input_size=embedding_dim,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     dropout=dropout,
                                     batch_first=True)
        
        self.answer_lstm = nn.LSTM(input_size=embedding_dim,
                                     hidden_size=hidden_size,
                                     num_layers=1,
                                     dropout=dropout,
                                     batch_first=True)
        
        
        self.dropout = nn.Dropout(dropout)
        
    
    def forward(self, passage, question, answer = None):
        
        passage_encoders, p_states = self.passage_lstm(passage)
        question_encoders, q_states = self.question_lstm(question)
        
        if answer == None:
            return passage_encoders, question_encoders, None
        
        answer_encoders, a_states = self.answer_lstm(answer)
        return passage_encoders, question_encoders, answer_encoders
    
    

class MultiTask(nn.Module):
    
    def __init__(self, ):
        
        super(MultiTask, self).__init__(hidden_size, dropout)

        self.hidden_size = hidden_size
        
        self.embedding = None
        self.embedding_dim = None
        self.init_embedding()
        self.voc = voc
        
        self.preprocess_layer = PreprocessLayer(hidden_size, dropout, embedding_dim)
        self.understandpassage_task = UnderstandPassage(self.embedding, hidden_size, dropout, voc)
        self.understandquestion_task = UnderstandQuestion(dropout, hidden_size)
        self.match_task = MatchLSTM(hidden_size, dropout, passage_len)
        
        self.dropout = nn.Dropout(dropout)
        
        
    def init_embedding(self):
        
        pretrained_weight = torch.Tensor(json.load(open(embedding_dir)))
        embedding = torch.nn.Embedding(num_embeddings=pretrained_weight.size()[0], embedding_dim=pretrained_weight.size()[1])
        embedding.weight = nn.Parameter(pretrained_weight)
        embedding.weight.requires_grad = False

        self.embedding_dim = pretrained_weight.size()[1]
        self.embedding = embedding
        
        
    def preprocess(self, passage, question, answer = None):
        
        passage = Variable(passage).long().cuda()
        question = Variable(question).long().cuda()
        
        passage = self.embedding(passage)
        question = self.embedding(question)
        
        
        if not answer == None:
            answer = Variable(answer).long().cuda()
            answer = self.embedding(answer)
        
        passage_encoders, question_encoders, answer_encoders = self.preprocess_layer(passage, question, answer)

    def forward(self, passage, question, answer, decoder_inputs, max_question_len, is_generation = True, is_classification = True):
        
        passage_encoders, question_encoders, answer_encoders = self.preprocess(passage, question, answer)
        
        generation_logits = None
        generation_predictions = None
        classification_logits = None
        classification_predictions = None
        macth_logits = None
        match_predictions = None
        
        if is_generation:
            generation_logits, generation_predictions = self.understandpassage_task(passage_encoders, answer_encoders, 
                                                                                    decoder_inputs,
                                                                                    is_teacher_forcing, max_question_len)
            
        if is_classification:
            classification_logits, classification_predictions = self.understandquestion_task(question_encoders, answer_encoders)
            
        macth_logits, match_predictions = self.match_task(passage_encoders, question_encoders)
        
        return {'generation_logits':generation_logits,
                'generation_predictions':generation_predictions,
                'classification_logits':classification_logits,
                'classification_predictions':classification_predictions,
                'macth_logits':macth_logits,
                'match_predictions':match_predictions}
    
    def get_loss(self, match_logits, match_labels, generation_logits, generation_labels, classification_logits, 
                 classification_labels, is_generation = True, is_classification = True):
        
        generation_loss = None
        match_loss = None
        classification_loss = None
        
        if is_generation:
            generation_loss = self.understandpassage_task.get_loss(generation_logits, generation_labels)
        if is_classification:
            classification_loss = self.understandquestion_task.get_loss(classification_logits, classification_labels)
        
        match_loss = self.match_task.get_loss(macth_logits, match_labels)
        
        return {'generation_loss':generation_loss,
                'match_loss':match_loss,
                'classification_loss':classification_loss}
    
    