In [9]:
from mxnet.gluon import data as gdata 
from mxnet.gluon import loss as gloss 
from mxnet.gluon import utils as gutils
from mxnet.gluon import nn, rnn
from mxnet import gluon, init, nd
import pickle
import d2lzh as d2l


In [21]:
class BiRNN(nn.Block):
    def __init__(self, vocab, embed_size, num_hiddens, num_layers, **kwargs):
        super(BiRNN, self).__init__(**kwargs)
        self.embedding = nn.Embedding(len(vocab), embed_size)
        # bidirectional 设为True即得到双向循环网络
        self.encoder = rnn.LSTM(num_hiddens, num_layers=num_layers,
                                   bidirectional=True, input_size=embed_size)
        self.decoder = nn.Dense(2)
    
    def forward(self,inputs):
        # inputs的形状是（批量大小， 词数），因为LSTM需要将序列作为第一维，所以将输入转置后
        # 再提取词特征，输出形状为（词数，批量大小，词向量维度）
        embeddings = self.embedding(inputs.T)
        # rnn.LSTM只传入输入embeddings, 因此只返回最后一层的隐藏层在各时间步的隐藏状态。
        # outputs形状是（词数，批量大小，2*隐藏单元个数）
        outputs = self.encoder(embeddings)
        # 连结初始时间步和最终时间步的隐藏状态作为全连接层输入。它的形状为
        # （批量大小，4*隐藏单元个数）
        encoding = nd.concat(outputs[0], outputs[-1])
        outs = self.decoder(encoding)
        return outs

In [6]:
train_data, test_data = d2l.read_imdb('train'),d2l.read_imdb('test')

In [7]:
glove_embedding = pickle.load(open('../pkl/glove_embedding.pkl','rb'))

In [8]:
vocab = d2l.get_vocab_imdb(train_data)

In [24]:
batch_size = 64
train_set = gdata.ArrayDataset(*d2l.preprocess_imdb(train_data,vocab))
train_iter = gdata.DataLoader(train_set, batch_size, shuffle=True)
test_set = gdata.ArrayDataset(*d2l.preprocess_imdb(test_data,vocab))
test_iter = gdata.DataLoader(test_set, batch_size, shuffle=True)

In [22]:
embed_size, num_hiddens, num_layers, ctx = 100, 100, 2, d2l.try_all_gpus()
net = BiRNN(vocab, embed_size, num_hiddens, num_layers)
net.initialize(init.Xavier(), ctx=ctx)

In [23]:
net.embedding.weight.set_data(glove_embedding.idx_to_vec)
net.embedding.collect_params().setattr('grad_req','null')

In [25]:
lr, num_epochs = 0.01, 5
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate':lr})
loss = gloss.SoftmaxCrossEntropyLoss()
d2l.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)

training on [cpu(0)]
epoch 1, loss 0.5172, train acc 0.735, test acc 0.821, time 3105.5 sec
epoch 2, loss 0.3971, train acc 0.827, test acc 0.834, time 2931.8 sec
epoch 3, loss 0.3545, train acc 0.851, test acc 0.845, time 2724.7 sec
epoch 4, loss 0.3204, train acc 0.868, test acc 0.834, time 1737.8 sec
epoch 5, loss 0.2875, train acc 0.884, test acc 0.844, time 1423.0 sec
