## 5.5　RNNLM の学習と評価

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt

In [2]:
os.chdir('/Users/yuta.shimizu/Downloads/ML/deep-learning-from-scratch-2-master/ch05')
sys.path.append(os.pardir)

In [3]:
from dataset import ptb
from common.time_layers import *
from common.optimizer import SGD
from simple_rnnlm import SimpleRnnlm
from common.trainer import RnnlmTrainer

### 5.5.1　RNNLM の実装

In [4]:
class SimpleRnnlm:
    def __init__(self, vocab_size, wordvec_size, hidden_size):
        V, D, H = vocab_size, wordvec_size, hidden_size
        rn = np.random.randn
        
        embed_W = (rn(V, D) / 100).astype('f')
        rnn_Wx = (rn(D, H) / np.sqrt(D)).astype('f')
        rnn_Wh = (rn(H, H) / np.sqrt(H)).astype('f')
        rnn_b = np.zeros(H).astype('f')
        affine_W = (rn(H, V) / np.sqrt(H)).astype('f')
        affine_b = np.zeros(V).astype('f')
        
        self.layers = [
            TimeEmbedding(embed_W),
            TimeRNN(rnn_Wx, rnn_Wh, rnn_b, stateful=True),
            TimeAffine(affine_W, affine_b)
        ]
        self.loss_layer = TimeSoftmaxWithLoss()
        self.rnn_layer = self.layers[1]
        
        self.params, self.grads = [], []
        for layer in self.layers:
            self.params += layer.params
            self.grads += layer.grads
            
    def forward(self, xs, ts):
        for layer in self.layers:
            xs = layer.forward(xs)
        loss = self.loss_layer.forward(xs, ts)
        return loss
    
    def backward(self, dout=1):
        dout = self.loss_layer.backward(dout)
        for layer in reversed(self.layers):
            dout = layer.backward(dout)
        return dout
    
    def reset_state(self):
        self.rnn_layer.reset_state()

`wordvec_size` は RNN レイヤにおける入力ベクトルの次元数 `D` である。<br>
`Wx` と `Wh` の形状は、P.197 を参照する。<br>
Time Embedding レイヤでは、`Xs` は $ N \times T$、`embed_W` は $ V \times D $ であり、必要なベクトルだけ抜き出すので、$ N \times T \times D $ になる。

### 5.5.3　RNNLM の学習コード

In [5]:
batch_size = 10
wordvec_size = 100
hidden_size = 100
time_size = 5
lr = 0.1
max_epoch = 100

In [6]:
corpus, word_to_id, id_to_word = ptb.load_data('train')
corpus_size = 1000
corpus = corpus[:corpus_size]
vocab_size = int(max(corpus) + 1)

In [7]:
xs = corpus[:-1]
ts = corpus[1:]
data_size = len(xs)
print(data_size)
print('corpus size: %d, vocabulary size: %d' % (corpus_size, vocab_size))

999
corpus size: 1000, vocabulary size: 418


RNN は入力した次の単語を予測するので<br>
`corpus` から最後の単語を除いたものが `xs`、最初の単語を除いたものが `ts` となる。

In [8]:
max_iters = data_size // (batch_size * time_size)
time_idx = 0
total_loss = 0
loss_count = 0
ppl_list = []

print(max_iters)

19


In [9]:
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)

In [10]:
jump = (corpus_size -1) // batch_size
offsets = [i * jump for i in range(batch_size)]

for epoch in range(max_epoch):
    for iter in range(max_iters):
        batch_x = np.empty((batch_size, time_size), dtype='i')
        batch_t = np.empty((batch_size, time_size), dtype='i')
        for t in range(time_size):
            for i, offset in enumerate(offsets):
                batch_x[i, t] = xs[(offset + time_idx) % data_size]
                batch_t[i, t] = ts[(offset + time_idx) % data_size]
            time_idx += 1
        
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)
        total_loss += loss
        loss_count += 1
        
    ppl = np.exp(total_loss / loss_count)
    print('| epoch %d | perplexity %.2f' % (epoch+1, ppl))
    total_loss, loss_count = 0, 0

| epoch 1 | perplexity 406.94
| epoch 2 | perplexity 293.15
| epoch 3 | perplexity 229.88
| epoch 4 | perplexity 215.94
| epoch 5 | perplexity 205.66
| epoch 6 | perplexity 202.78
| epoch 7 | perplexity 198.69
| epoch 8 | perplexity 196.35
| epoch 9 | perplexity 191.86
| epoch 10 | perplexity 193.03
| epoch 11 | perplexity 188.99
| epoch 12 | perplexity 192.59
| epoch 13 | perplexity 190.72
| epoch 14 | perplexity 190.89
| epoch 15 | perplexity 189.58
| epoch 16 | perplexity 186.01
| epoch 17 | perplexity 184.61
| epoch 18 | perplexity 181.58
| epoch 19 | perplexity 182.43
| epoch 20 | perplexity 183.49
| epoch 21 | perplexity 182.50
| epoch 22 | perplexity 179.99
| epoch 23 | perplexity 174.73
| epoch 24 | perplexity 175.50
| epoch 25 | perplexity 173.94
| epoch 26 | perplexity 174.72
| epoch 27 | perplexity 168.58
| epoch 28 | perplexity 167.66
| epoch 29 | perplexity 163.28
| epoch 30 | perplexity 158.21
| epoch 31 | perplexity 160.08
| epoch 32 | perplexity 155.52
| epoch 33 | perp

In [11]:
time_idx = 0

jump = (corpus_size -1) // batch_size
offsets = [i * jump for i in range(batch_size)]
print(jump)
print(offsets)

batch_x = np.empty((batch_size, time_size), dtype='i')
batch_t = np.empty((batch_size, time_size), dtype='i')
for t in range(time_size):
    for i, offset in enumerate(offsets):
        batch_x[i, t] = xs[(offset + time_idx) % data_size]
        batch_t[i, t] = ts[(offset + time_idx) % data_size]
    time_idx += 1

99
[0, 99, 198, 297, 396, 495, 594, 693, 792, 891]


In [12]:
print(7 // 3)
print(7 % 3)

2
1


In [13]:
print(999 % 999)
print(1000 % 999)

0
1


In [14]:
'''
batch_x[0, 0] = xs[(0 + 0)]
batch_x[1, 0] = xs[(99 + 0)]
batch_x[2, 0] = xs[(198 + 0)]
.
.
batch_x[0, 1] = xs[(0 + 1)]
batch_x[1, 1] = xs[(99 + 1)]
batch_x[2, 1] = xs[(198 + 1)]
.
.
batch_x[0, 4] = xs[(0 + 4)]
batch_x[1, 4] = xs[(99 + 4)]
batch_x[2, 4] = xs[(198 + 4)]

'''
print('')




In [15]:
print(batch_x)
print('')
print(batch_t)

[[  0   1   2   3   4]
 [ 42  76  77  64  78]
 [ 26  26  98  56  40]
 [ 24  32  26 175  98]
 [208 209  80 197  32]
 [ 26  79  26  80  32]
 [274 275 276  42  61]
 [ 88 303  26 304  26]
 [ 42  35  72 350  64]
 [339 359 181 328 386]]

[[  1   2   3   4   5]
 [ 76  77  64  78  79]
 [ 26  98  56  40 128]
 [ 32  26 175  98  61]
 [209  80 197  32  82]
 [ 79  26  80  32 241]
 [275 276  42  61  24]
 [303  26 304  26  32]
 [ 35  72 350  64  27]
 [359 181 328 386 387]]


In [16]:
print(xs[0])
print(xs[99])
print(xs[198])
print(xs[297])

0
42
26
24


### 5.5.4　RNNLM の Trainer クラス

In [17]:
model = SimpleRnnlm(vocab_size, wordvec_size, hidden_size)
optimizer = SGD(lr)
trainer = RnnlmTrainer(model, optimizer)

trainer.fit(xs, ts, max_epoch, batch_size, time_size)

| epoch 1 |  iter 1 / 19 | time 0[s] | perplexity 420.57
| epoch 2 |  iter 1 / 19 | time 0[s] | perplexity 384.57
| epoch 3 |  iter 1 / 19 | time 0[s] | perplexity 266.54
| epoch 4 |  iter 1 / 19 | time 0[s] | perplexity 221.67
| epoch 5 |  iter 1 / 19 | time 0[s] | perplexity 212.34
| epoch 6 |  iter 1 / 19 | time 0[s] | perplexity 210.43
| epoch 7 |  iter 1 / 19 | time 0[s] | perplexity 201.79
| epoch 8 |  iter 1 / 19 | time 0[s] | perplexity 200.99
| epoch 9 |  iter 1 / 19 | time 0[s] | perplexity 195.81
| epoch 10 |  iter 1 / 19 | time 0[s] | perplexity 191.13
| epoch 11 |  iter 1 / 19 | time 0[s] | perplexity 192.31
| epoch 12 |  iter 1 / 19 | time 0[s] | perplexity 189.90
| epoch 13 |  iter 1 / 19 | time 0[s] | perplexity 193.39
| epoch 14 |  iter 1 / 19 | time 0[s] | perplexity 187.11
| epoch 15 |  iter 1 / 19 | time 0[s] | perplexity 186.39
| epoch 16 |  iter 1 / 19 | time 0[s] | perplexity 191.58
| epoch 17 |  iter 1 / 19 | time 0[s] | perplexity 189.71
| epoch 18 |  iter 1 / 