In [1]:
#coding:utf-8
import torch as t
import numpy as np
from torch import nn
from torch.utils.data import DataLoader
import ipdb

In [2]:
import sys
sys.path.append('../../ATAE-LSTM')
import Ipynb_importer
from config import opt
from data.Embedding import Emb
from models.BasicModule import BasicModule
from data.AspClas import AspClas

importing Jupyter notebook from ../../ATAE-LSTM/data/Embedding.ipynb
importing Jupyter notebook from ../../ATAE-LSTM/models/BasicModule.ipynb
importing Jupyter notebook from ../../ATAE-LSTM/data/AspClas.ipynb


In [3]:
class ATAE_LSTM(BasicModule):
    def __init__(self, emb):
        super(ATAE_LSTM, self).__init__()
        
        self.embedding = emb._make_layer_()
        
        self.lstm = nn.LSTM(opt.hidden_size*2, opt.hidden_size, batch_first=True)
        for k in self.lstm.state_dict().keys():
            self.lstm.state_dict()[k].uniform_(-opt.epsilon, opt.epsilon)
        
        """
        self.hidden=(
            # 三个参数分别为 num_layers, batch_size, hidden_size
            t.nn.Parameter(
                t.Tensor(
                    np.random.uniform(-opt.epsilon, opt.epsilon, opt.hidden_size)
                ).view(1,1,opt.hidden_size)
            ),
            t.nn.Parameter(
                t.Tensor(
                    np.random.uniform(-opt.epsilon, opt.epsilon, opt.hidden_size)
                ).view(1,1,opt.hidden_size)
            )
        )"""
        self.h0 = t.nn.Parameter(t.Tensor(
            np.random.uniform(-opt.epsilon, opt.epsilon, [1, 1, opt.hidden_size])
        ).expand([1, opt.batch_size, opt.hidden_size]))
        self.c0 = t.nn.Parameter(t.Tensor(
            np.random.uniform(-opt.epsilon, opt.epsilon, [1, 1, opt.hidden_size])
        ).expand([1, opt.batch_size, opt.hidden_size]))
        
        self.Wh = t.nn.Parameter(
            t.Tensor(
                np.random.uniform(-opt.epsilon, opt.epsilon, [opt.hidden_size, opt.hidden_size])
            )
        )
        self.LayerNorm_Wh = nn.LayerNorm([opt.batch_size, opt.max_seq_len, opt.hidden_size])
        
        self.Wv = t.nn.Parameter(
            t.Tensor(
                np.random.uniform(-opt.epsilon, opt.epsilon, [opt.hidden_size, opt.hidden_size])
            )
        )
        self.LayerNorm_Wv = nn.LayerNorm([opt.batch_size, opt.max_seq_len, opt.hidden_size])
        
        self.omega = t.nn.Parameter(
            t.Tensor(
                np.random.uniform(-opt.epsilon, opt.epsilon, opt.hidden_size*2)
            ).view(opt.hidden_size*2, 1)
        )
        
        self.LayerNorm_alpha = nn.LayerNorm([opt.batch_size, opt.max_seq_len, 1])
        
        self.Wp = t.nn.Parameter(
            t.Tensor(
                np.random.uniform(-opt.epsilon, opt.epsilon, [opt.hidden_size, opt.hidden_size])
            )
        )
        self.Wx = t.nn.Parameter(
            t.Tensor(
                np.random.uniform(-opt.epsilon, opt.epsilon, [opt.hidden_size, opt.hidden_size])
            )
        )
        self.LayerNorm_summary = nn.LayerNorm([opt.batch_size, opt.hidden_size])
        
        self.lin = nn.Linear(opt.hidden_size, opt.classes)
        for k in self.lin.state_dict().keys():
            self.lin.state_dict()[k].uniform_(-opt.epsilon, opt.epsilon)
        
        if opt.use_myAttentionMechanism:
            self.myPrjoectionMatrix = t.nn.Parameter(
                t.eye(opt.hidden_size)*opt.epsilon
            )
            self.model_name += '_usMyAttentionMechanism'
        
        return
    
    def forward(self, sentence, term, returnAttention=False):
        
        # sentence: [batch_size, max_seq_len]
        # term: [batch_size, max_term_len]
        
        sentence_attention_mask = (sentence==0).float()*-10000 # [batch_size, max_seq_len] This is a mask over <PADDING> tokens
        
        sentence_embeddings = self.embedding(sentence) # [batch_size, max_seq_len, hidden_size]
        term_embeddings = []
        for batch in range(term.size(0)):
            valid_length = (term[batch, :] != 0).sum()
            term_embeddings.append(self.embedding(term[batch, :valid_length]).mean(dim=0).unsqueeze(dim=0))
        aspect_embeddings = t.cat(term_embeddings, dim=0) # [batch_size, hidden_size]
        e1 = sentence_embeddings
        e2 = aspect_embeddings.view(opt.batch_size,1,opt.hidden_size).expand(opt.batch_size,opt.max_seq_len,opt.hidden_size)
        
        # e1: [batch_size, max_seq_len, hidden_size]
        # e2: [batch_size, max_seq_len, hidden_size]
        
        wv = t.cat((e1,e2),dim=-1)
        # wv: [batch_size, max_seq_len, 2*hidden_size]
        
        out, (h, c) = self.lstm(wv, (self.h0, self.c0))
        # e.g.
        # out: [batch_size, max_seq_len, hidden_size]
        # h: [1, batch_size, hidden_size]
        # c: [1, batch_size, hidden_size]
        
        N_valid = (sentence!=0).sum(dim=1).squeeze().long()
        for i_batch in range(opt.batch_size):
            h[:, i_batch, :] = out[i_batch, N_valid[i_batch]-1, :]
            
        if opt.use_myAttentionMechanism:
            alpha_ = t.matmul(t.matmul(out, self.myPrjoectionMatrix), aspect_embeddings.unsqueeze(dim=2))
        
        else:
            Wh_out = t.matmul(out, self.Wh)
            if opt.use_layerNorm:
                Wh_out = self.LayerNorm_Wh(Wh_out)
            # [batch_size, max_seq_len, hidden_size]

            Wv_aspect = t.matmul(
                aspect_embeddings, self.Wv
            ).view(
                opt.batch_size,1,opt.hidden_size
            ).expand(opt.batch_size,opt.max_seq_len,opt.hidden_size)
            if opt.use_layerNorm:
                Wv_aspect = self.LayerNorm_Wv(Wv_aspect)
            # [batch_size, max_seq_len, hidden_size]

            vh = t.cat((Wh_out, Wv_aspect), dim=2)
            # [batch_size, max_seq_len, 2*hidden_size]

            M = t.tanh(vh)
            # [batch_size, max_seq_len, 2*hidden_size]

            alpha_ = t.matmul(M, self.omega)
            
        if opt.use_layerNorm:
            alpha_ = self.LayerNorm_alpha(alpha_)
        alpha = nn.functional.softmax(
            alpha_+sentence_attention_mask.unsqueeze(dim=2), dim=1
        ).view(opt.batch_size,1,opt.max_seq_len)
        # [batch_size, 1, max_seq_len]
        
        r = t.matmul(alpha, out)
        # [batch_size, 1, hidden_size]
        
        summary = t.matmul(t.squeeze(r), self.Wp) + t.matmul(t.squeeze(h), self.Wx)
        if opt.use_layerNorm:
            summary = self.LayerNorm_summary(summary)
        #_h_ = t.tanh(summary)
        _h_ = t.nn.functional.leaky_relu(summary)
        # [batch_size, hidden_size]
        
        y = nn.functional.softmax(self.lin(_h_), dim=1)
        # [batch_size, num_classes]
        
        if not returnAttention:
            return y
        else:
            return y, alpha

In [4]:
if __name__=='__main__':
    testDataset = AspClas(opt.test_data_root)
    testDataLoader = DataLoader(testDataset, batch_size=opt.batch_size, shuffle=True)

100%|██████████| 100/100 [00:00<00:00, 399457.52it/s]
100%|██████████| 100000/100000 [00:00<00:00, 271214.25it/s]
100%|██████████| 100000/100000 [00:06<00:00, 14345.41it/s]

Embedding : successfully input 100000 pretrained word embeddings while 0 failed





In [5]:
if __name__=='__main__':
    %pdb on
    model = ATAE_LSTM(testDataset.emb).cuda()
    sentence, terms, label = list(testDataLoader)[0]
    print(sentence)
    print(terms)
    print(label)
    y, attention = model(sentence.cuda(), terms.cuda(), returnAttention=True)
    print(y)
    print(attention.tolist())

Automatic pdb calling has been turned ON
tensor([[144,   3, 145,  ...,   0,   0,   0],
        [ 61,  62,   9,  ...,   0,   0,   0],
        [144,   3, 145,  ...,   0,   0,   0],
        ...,
        [  3,  21,  29,  ...,   0,   0,   0],
        [128,   1,  58,  ...,   0,   0,   0],
        [  3,  75,  11,  ...,   0,   0,   0]])
tensor([[145,   0,   0,  ...,   0,   0,   0],
        [ 63,   0,   0,  ...,   0,   0,   0],
        [146,   0,   0,  ...,   0,   0,   0],
        ...,
        [ 21,  29,   0,  ...,   0,   0,   0],
        [  1,   0,   0,  ...,   0,   0,   0],
        [ 75,   0,   0,  ...,   0,   0,   0]])
tensor([[2],
        [2],
        [2],
        [0],
        [2],
        [0],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [0],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
        [2],
  

Layer Normalization:<br>
$y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta$