# 6章 ゲート付きRNN

## 6.4 LSTMを使った言語モデル

In [3]:
import sys
sys.path.append('..')
from common.time_layers import *
from common.base_model import BaseModel


class Rnnlm(BaseModel):
    def __init__(self, vocab_size=10000, wordvec_size=100, hidden_size=100):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn

        # 重みの初期化
        embed_W = (rn(V, D) / 100).astype('f')
        lstm_Wx = (rn(D, 4 * H) / np.sqrt(D)).astype('f')
        lstm_Wh = (rn(H, 4 * H) / np.sqrt(H)).astype('f')
        lstm_b = np.zeros(4 * H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')

        # レイヤの生成
        self.layers = [
            TimeEmbedding(embed_W),
            TimeLSTM(lstm_Wx, lstm_Wh, lstm_b, stateful=True),
            TimeAffine(affine_W, affine_b)
        ]
        self.loss_layer = TimeSoftmaxWithLoss()
        self.lstm_layer = self.layers[1]

        # すべての重みと勾配をリストにまとめる
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads

    def predict(self, xs):
        for layer in self.layers:
            xs = layer.forward(xs)
        return xs

    def forward(self, xs, ts):
        score = self.predict(xs)
        loss = self.loss_layer.forward(score, ts)
        return loss

    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout

    def reset_state(self):
        self.lstm_layer.reset_state()

### PTBデータセットの学習

In [1]:
import sys
sys.path.append('..')
from common.optimizer import SGD
from common.trainer import RnnlmTrainer
from common.util import eval_perplexity
from dataset import ptb
from rnnlm import Rnnlm


# ハイパーパラメータの設定
batch_size = 20
wordvec_size = 100
hidden_size = 100  # RNNの隠れ状態ベクトルの要素数
time_size = 35  # RNNを展開するサイズ
lr = 20.0
max_epoch = 4
max_grad = 0.25

# 学習データの読み込み
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_test, _, _ = ptb.load_data('test')
vocab_size = len(word_to_id)
xs = corpus[:-1]
ts = corpus[1:]

# モデルの生成
model = Rnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

# 勾配クリッピングを適用して学習
trainer.fit(xs, ts, max_epoch, batch_size, time_size, max_grad,
            eval_interval=20)
trainer.plot(ylim=(0, 500))

# テストデータで評価
model.reset_state()
ppl_test = eval_perplexity(model, corpus_test)
print('test perplexity: ', ppl_test)

# パラメータの保存
model.save_params()


Downloading ptb.test.txt ... 
Done
| epoch 1 |  iter 1 / 1327 | time 0[s] | perplexity 10002.40
| epoch 1 |  iter 21 / 1327 | time 6[s] | perplexity 3133.47
| epoch 1 |  iter 41 / 1327 | time 13[s] | perplexity 1200.75
| epoch 1 |  iter 61 / 1327 | time 21[s] | perplexity 975.10
| epoch 1 |  iter 81 / 1327 | time 28[s] | perplexity 805.50
| epoch 1 |  iter 101 / 1327 | time 36[s] | perplexity 662.09
| epoch 1 |  iter 121 / 1327 | time 45[s] | perplexity 642.76
| epoch 1 |  iter 141 / 1327 | time 53[s] | perplexity 592.91
| epoch 1 |  iter 161 / 1327 | time 61[s] | perplexity 594.51
| epoch 1 |  iter 181 / 1327 | time 69[s] | perplexity 585.68
| epoch 1 |  iter 201 / 1327 | time 78[s] | perplexity 505.94
| epoch 1 |  iter 221 / 1327 | time 86[s] | perplexity 491.61
| epoch 1 |  iter 241 / 1327 | time 94[s] | perplexity 442.68
| epoch 1 |  iter 261 / 1327 | time 103[s] | perplexity 457.40
| epoch 1 |  iter 281 / 1327 | time 111[s] | perplexity 447.76
| epoch 1 |  iter 301 / 1327 | time 1

KeyboardInterrupt: 