Dive into Deep Learning 

Chapter 10

Section 8

In [31]:
import d2lzh as d2l
from mxnet import gluon
from mxnet.gluon import data as gdata, loss as gloss, nn 
from mxnet import gluon, init, nd
from mxnet.contrib import text

In [6]:
batch_size = 64
#d2l.download_imdb()

In [8]:
train_data, test_data = d2l.read_imdb('train'),d2l.read_imdb('test')

In [13]:
# data preview
train_data[:2]

[['this movie is to halloween what the hilarious "christmas story" is to christmas: both are relatively low-budget, no-big-name-stars type films...and both are two of the absolute greatest and funniest movies available, both seasonal classics!!! "spaced invaders" comes galloping out right from the start with warmth and humor and a superb cast of characters...all five goofy martians, klembecker the realtor, russell the deputy, vern at the "fuel dispensing depot" and so many more! you just have to see this movie to believe it, and, like "christmas story", it just keeps getting better and better with each viewing, and you pick up on fun little things each time!! most definitely a ten!!!',
  1],
 ["i've said this in other reviews, without a story, you can give the audience all the smoke and mirrors you want, still no one will give a damn.<br /><br />the director seems to have a great eye for 30s art deco (which i love), and i think the idea of using all digital backgrounds and such could i

In [14]:
vocab = d2l.get_vocab_imdb(train_data)

In [20]:
vocab

<mxnet.contrib.text.vocab.Vocabulary at 0x7fbb249f1050>

In [40]:
train_iter = gdata.DataLoader(gdata.ArrayDataset(
    *d2l.preprocess_imdb(train_data, vocab)), batch_size, shuffle = True)
test_iter = gdata.DataLoader(gdata.ArrayDataset(
    *d2l.preprocess_imdb(test_data, vocab)), batch_size)

In [49]:
class TextCNN(nn.Block):
    def __init__(self, vocab, embed_size, kernel_sizes, num_channels,
                     **kwargs):
        super(TextCNN, self).__init__(**kwargs)
        self.embedding = nn.Embedding(len(vocab), embed_size)
        # 不参与训练的嵌入层
        self.constant_embedding = nn.Embedding(len(vocab),embed_size)
        self.dropout = nn.Dropout(0.5)
        self.decoder = nn.Dense(2)
        # 时序最大池化层没有权重， 所以可以共用一个实例
        self.pool = nn.GlobalMaxPool1D()
        self.convs = nn.Sequential() # 创建多个一维卷积层
        for c,k in zip(num_channels, kernel_sizes):
            self.convs.add(nn.Conv1D(c,k,activation='relu'))
            
    def forward(self, inputs):
        # 将两个形状是（批量大小，词数，词向量维度）的嵌入层的输出按词向量连接
        embeddings = nd.concat(
            self.embedding(inputs), self.constant_embedding(inputs),dim=2)
        # 根据Conv1D要求的输入格式，将词向量维，即一维卷积层的通道维，变换到前一维
        embeddings = embeddings.transpose((0,2,1))
        # 对于每一个一维卷积层， 在时许最大池化后会得到一个形状为（批量大小，通道大小，1）的
        # NDArray。使用flatten函数去掉最后一维，然后再通道维上连接
        encoding = nd.concat(*[nd.flatten(
            self.pool(conv(embeddings))) for conv in self.convs], dim=1)
        # 应用dropout后使用全连接层得到输出
        outputs = self.decoder(self.dropout(encoding))
        return outputs

In [50]:
#创建一个TextCNN实例。它有3个卷积层，它们的核宽分别为3、4、5，输出通道数均为100。
embed_size, kernel_sizes, num_channels = 100, [3,4,5], [100,100,100]
ctx = d2l.try_all_gpus()
net = TextCNN(vocab, embed_size, kernel_sizes, num_channels)
net.initialize(init.Xavier(), ctx=ctx)

In [34]:
text.embedding.get_pretrained_file_names().keys()

dict_keys(['glove', 'fasttext'])

In [35]:
text.embedding.get_pretrained_file_names('glove')

['glove.42B.300d.txt',
 'glove.6B.50d.txt',
 'glove.6B.100d.txt',
 'glove.6B.200d.txt',
 'glove.6B.300d.txt',
 'glove.840B.300d.txt',
 'glove.twitter.27B.25d.txt',
 'glove.twitter.27B.50d.txt',
 'glove.twitter.27B.100d.txt',
 'glove.twitter.27B.200d.txt']

In [37]:
# 1. 加载预训练的词向量
glove_embedding = text.embedding.create(
    'glove',pretrained_file_name= 'glove.6B.100d.txt',vocabulary=vocab)


In [51]:
net.embedding.weight.set_data(glove_embedding.idx_to_vec)
net.constant_embedding.weight.set_data(glove_embedding.idx_to_vec)
net.constant_embedding.collect_params().setattr('grad_req','null')

In [None]:
# 2. 训练模型
lr, num_epochs = 0.001, 5
trainer = gluon.Trainer(net.collect_params(), 'adam', {'learning_rate': lr})
loss = gloss.SoftmaxCrossEntropyLoss()
d2l.train(train_iter, test_iter, net, loss, trainer, ctx, num_epochs)

training on [cpu(0)]
epoch 1, loss 0.5715, train acc 0.726, test acc 0.827, time 456.1 sec
epoch 2, loss 0.3593, train acc 0.841, test acc 0.854, time 431.7 sec
epoch 3, loss 0.2631, train acc 0.893, test acc 0.860, time 435.3 sec
