In [23]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

import re
import os
import time
import numpy as np
import io
import math

import torch

from mxnet import autograd, nd
from mxnet.gluon import loss as gloss

import d2lzh as d2l

In [3]:
def load_lyrics(f_name):
    #read lyrics (Japanese)
    '''
    f_name: preprocessed lyric txt file
    corpus_indices: list of character index
    char_to_idx: dictionary
    idx_to_char: dictionary
    vocab_size: int, the number of distinct character
    '''
    el_file = open(f_name + ".txt",'r', encoding = 'gbk')
    text = el_file.read().lower()
    corpus_chars = text.replace('\n', ' ')
    idx_to_char = list(set(corpus_chars))
    char_to_idx = dict([(char, i) for i, char in enumerate(idx_to_char)])
    vocab_size = len(char_to_idx)
    corpus_indices = [char_to_idx[char] for char in corpus_chars]
    return corpus_indices, char_to_idx, idx_to_char, vocab_size

In [4]:
corpus_indices, char_to_idx, idx_to_char, vocab_size = load_lyrics('hoshino2')

In [71]:
vocab_size

1068

In [31]:
batch_size, num_steps = 5, 5

In [40]:
def to_onehot(X, size):
    return [nd.one_hot(x, size) for x in X.T]

In [44]:
num_inputs, num_hiddens, num_outputs = vocab_size, 256, vocab_size

In [28]:
def get_params():
    def _one(shape):
        return nd.random.normal(scale=0.01, shape=shape, ctx=ctx)

    W_xh = _one((num_inputs, num_hiddens))
    W_hh = _one((num_hiddens, num_hiddens))
    b_h = nd.zeros(num_hiddens, ctx=ctx)

    W_hq = _one((num_hiddens, num_outputs))
    b_q = nd.zeros(num_outputs, ctx=ctx)

    params = [W_xh, W_hh, b_h, W_hq, b_q]
    for param in params:
        param.attach_grad()
    return params

In [29]:
def init_rnn_state(batch_size, num_hiddens, ctx):
    return (nd.zeros(shape=(batch_size, num_hiddens), ctx=ctx),)

In [49]:
def rnn(inputs, state, params):
    W_xh, W_hh, b_h, W_hq, b_q = params
    H, = state
    outputs = []
    for X in inputs:
        H = nd.tanh(nd.dot(X, W_xh) + nd.dot(H, W_hh) + b_h)
        Y = nd.dot(H, W_hq) + b_q
        outputs.append(Y)
    return outputs, (H,)

In [30]:
def predict_rnn(prefix, num_chars, rnn, params, init_rnn_state,
                num_hiddens, vocab_size, ctx, idx_to_char, char_to_idx):
    state = init_rnn_state(1, num_hiddens, ctx)
    output = [char_to_idx[prefix[0]]]
    for t in range(num_chars + len(prefix) - 1):
        X = to_onehot(nd.array([output[-1]], ctx=ctx), vocab_size)
        (Y, state) = rnn(X, state, params)
        if t < len(prefix) - 1:
            output.append(char_to_idx[prefix[t + 1]])
        else:
            output.append(int(Y[0].argmax(axis=1).asscalar()))
    return ''.join([idx_to_char[i] for i in output])

In [31]:
def grad_clipping(params, theta, ctx):
    norm = nd.array([0], ctx)
    for param in params:
        norm += (param.grad ** 2).sum()
    norm = norm.sqrt().asscalar()
    if norm > theta:
        for param in params:
            param.grad[:] *= theta / norm

In [56]:
def train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                          vocab_size, ctx, corpus, idx_to_char, char_to_idx, is_random_iter, num_epochs,
                          num_steps, lr, clipping_theta, batch_size, pred_period, pred_len, prefixes):
    if is_random_iter:
        data_iter_fn = d2l.data_iter_random
    else:
        data_iter_fn = d2l.data_iter_consecutive
    params = get_params()
    loss = gloss.SoftmaxCrossEntropyLoss()

    for epoch in range(num_epochs):
        if not is_random_iter:
            state = init_rnn_state(batch_size, num_hiddens, ctx)
        l_sum, n, start = 0.0, 0, time.time()
        data_iter = data_iter_fn(corpus_indices, batch_size, num_steps, ctx)
        for X, Y in data_iter:
            if is_random_iter:
                state = init_rnn_state(batch_size, num_hiddens, ctx)
            else:
                for s in state:
                    s.detach()
            with autograd.record():
                inputs = to_onehot(X, vocab_size)
                (outputs, state) = rnn(inputs, state, params)
                outputs = nd.concat(*outputs, dim=0)
                y = Y.T.reshape((-1,))
                l = loss(outputs, y).mean()
            l.backward()
            grad_clipping(params, clipping_theta, ctx)
            d2l.sgd(params, lr, 1)
            l_sum += l.asscalar() * y.size
            n += y.size

        if (epoch + 1) % pred_period == 0:
            print('epoch %d,perplexity %f,time %.2f sec' %
                  (epoch + 1, math.exp(l_sum / n), time.time() - start))
            for prefix in prefixes:
                print(' -', predict_rnn(prefix, pred_len, rnn, params,
                                        init_rnn_state, num_hiddens, vocab_size, ctx,
                                        idx_to_char, char_to_idx))

In [41]:
X = nd.arange(10).reshape((2,5))
inputs = to_onehot(X, vocab_size)

In [42]:
ctx = d2l.try_gpu()

In [43]:
state = init_rnn_state(X.shape[0], num_hiddens, ctx)

In [45]:
inputs = to_onehot(X.as_in_context(ctx), vocab_size)

In [46]:
params = get_params()

In [50]:
outputs, state_new = rnn(inputs, state, params)

In [51]:
print(len(outputs), outputs[0].shape, state_new[0].shape)

5 (2, 1068) (2, 256)


In [103]:
??d2l.data_iter_consecutive

In [106]:
print(predict_rnn('笑顔', 10, rnn, params, init_rnn_state, num_hiddens,
                  vocab_size, ctx, idx_to_char, char_to_idx))

笑顔使熟ワ雪溢腹銀口儚覚


In [64]:
num_epochs, num_steps, batch_size, lr, clipping_theta = 400, 35, 32, 1e2, 1e-2

In [66]:
pred_period, pred_len, prefixes = 50, 50, ['笑顔']

In [67]:
train_and_predict_rnn(rnn, get_params, init_rnn_state, num_hiddens,
                      vocab_size, ctx, corpus_indices, idx_to_char,
                      char_to_idx, True, num_epochs, num_steps, lr,
                      clipping_theta, batch_size, pred_period, pred_len, prefixes)

epoch 50,perplexity 14.293405,time 5.98 sec
 - 笑顔ではいだ あの娘を染めて広がる 動けない場所からいつかない まだ何ものいだ あの娘を くれたち来て 
epoch 100,perplexity 4.853905,time 6.47 sec
 - 笑顔のよう 君が覚の海 愛を歌ぶ 君の中を染めて広がる ただ地獄を進む者が悲しい記憶に勝つ 作り物だ世界
epoch 150,perplexity 3.123811,time 6.26 sec
 - 笑顔では 次のあなた ここで会いだから楽しい 誰かにないなるんで いる もんだならしいな僕らし するだら
epoch 200,perplexity 2.432333,time 6.15 sec
 - 笑顔では 次のあなたへの 橋になりますように 遠い場所も繋がっているよ 外の話を聞くさんならう 生まる胸
epoch 250,perplexity 2.061410,time 6.34 sec
 - 笑顔では どんな時ものれなには 心のあふれぬまで 輝くしゃもの水さらぬ 病だあるうべり捨いだけ 愛をはじ
epoch 300,perplexity 1.815772,time 6.21 sec
 - 笑顔では いつもの他倍を 歌アス濡るここのこのだら どうかの中でしいえも ああながらいつものう 何かには
epoch 350,perplexity 1.671480,time 6.11 sec
 - 笑顔ではいだなささ もんはさえないけど いつもでもないて イ跡われたまま 世界鳴らす言うな顔が うだろう
epoch 400,perplexity 1.567706,time 5.98 sec
 - 笑顔できただな 空に変わるけべて 光が辿う すかいなには笑うなさ もの丈ない指のこにあるものいつもの家倍


In [61]:
len('みんなが瞳にあるから したもの空を 日々のわみためは 次の口かを照らすんだ 命は続く日々のゲームは続く 君')

53