# RNN exercises


目标：

1. Character Language Model,  Basic RNN
    1. word representation
    2. character language model
    3. sample
    4. train
    5. summary
2. Writing like Shakespeare, LSTM RNN
    1. Model
    2. Train
    3. Generate


## 1 取名字

给定一个name列表（训练集），训练RNN模型，得到一个 Character Language Model（概率模型）。 然后通过这个Model Sample出一些name。

In [7]:
import numpy as np
import random


# 数据集： list of names
data = open('dinos.txt', 'r').read()
data= data.lower()
chars = list(set(data))
data_size, vocab_size = len(data), len(chars)

# 统计字符，字符字典
char_to_ix = { ch:i for i,ch in enumerate(sorted(chars)) }
ix_to_char = { i:ch for i,ch in enumerate(sorted(chars)) }
print('There are %d total characters and %d unique characters in your data.' % (data_size, vocab_size))
print(ix_to_char)

There are 19909 total characters and 27 unique characters in your data.
{0: '\n', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


    
<img src="images/rnn.png" style="width:450;height:300px;">
<caption><center> **Figure 1**: Recurrent Neural Network, similar to what you had built in the previous notebook "Building a RNN - Step by Step".  </center></caption>



<img src="images/dinos3.png" style="width:500;height:300px;">
<caption><center> **Figure 2**: In this picture, we assume the model is already trained. We pass in $x^{\langle 1\rangle} = \vec{0}$ at the first time step, and have the network then sample one character at a time. </center></caption>

In [48]:
def softmax(x):
    ex = np.exp(x - np.max(x))
    return ex / ex.sum(axis=0)

In [178]:
class RnnCell(object):
    def __init__(self, parameters):
        self.parameters = parameters
        self.graidents = {}
        self.x = None
        self.ai = None
        self.y = None
        self.ao = None
        self.dy = None
        
    def forward_pass(self, x, ai):
        Wax, Waa, ba, Wya, by = [self.parameters[k] for k in ['Wax', 'Waa', 'ba', 'Wya', 'by']]
        self.x = x
        self.ai = ai
        self.ao = np.tanh(np.dot(Wax, x) + np.dot(Waa, ai) + ba)
        self.y = softmax(np.dot(Wya, self.ao) + by)
        return self.ao
    
    def backward_pass(self, dao):
        Wax, Waa, Wya = [self.parameters[k] for k in ['Wax', 'Waa', 'Wya']]
        dy = self.dy
        
        dWya = np.dot(dy, self.ao.T)
        dby = dy
        da = dao + np.dot(Wya.T, dy)
        dz = da * (1 - self.ao * self.ao)  # a = tanh(z), dz = da * (1 - a * a)
        dWaa = np.dot(dz, self.ai.T)
        dWax = np.dot(dz, self.x.T)
        dba = dz
        dai = np.dot(Waa.T, dz)
        gradients = {'Wax': dWax, 'Waa': dWaa, 'ba': dba, 'Wya': dWya, 'by': dby}
        return gradients, dai        


class CharacterLanguageModel():
    def __init__(self, n_a=50, vocab_size=27):
        self.n_x = vocab_size
        self.n_y = vocab_size   # T_y = T_x, n_y = n_x
        self.n_a = n_a
        self.initialize_parameters(n_a, vocab_size, vocab_size)
        
    def initialize_parameters(self, n_a, n_x, n_y, seed=1):
        np.random.seed(seed)
        Wax = np.random.randn(n_a, n_x)*0.01 # input to hidden
        Waa = np.random.randn(n_a, n_a)*0.01 # hidden to hidden
        Wya = np.random.randn(n_y, n_a)*0.01 # hidden to output
        b = np.zeros((n_a, 1)) # hidden bias
        by = np.zeros((n_y, 1)) # output bias

        self.parameters = {"Wax": Wax, "Waa": Waa, "Wya": Wya, "ba": b,"by": by}
        
    def run_forward(self, X, a0):
        T_x = len(X)
        loss = 0.0
        cells = []
        
        x = np.zeros((self.n_x, 1))
        ai = np.copy(a0)
        for t in range(T_x):
            cell = RnnCell(self.parameters)
            ai = cell.forward_pass(x, ai)
            
            # loss and dy
            loss -= np.log(cell.y[X[t], 0])
            cell.dy = cell.y.copy()
            cell.dy[X[t]] -= 1
            
            cells.append(cell)
            # for next
            x = np.zeros((self.n_x, 1))
            x[X[t]] = 1
        
#         print(cells[-1].ao[:5])
        return loss, cells
    
    def run_backwards(self, cells):
        dao = np.zeros((self.n_a, 1))
        gradients = {}
        for k in self.parameters:
            gradients[k] = np.zeros_like(self.parameters[k])
        for cell in reversed(cells):
            grad, dao = cell.backward_pass(dao) 
            for k in grad:
                gradients[k] += grad[k]
        return gradients
    
    def update_parameters(self, gradients, learning_rate):        
        # clip
        for gradient in gradients.values():
            np.clip(gradient, -1, 1, out=gradient)
            
        for k in self.parameters:
            self.parameters[k] += -learning_rate * gradients[k]
     
    def optimize(self, X, a0, learning_rate=0.01):
        # X: list of character index        
        T_x = len(X)
        loss, cells = self.run_forward(X, a0)
        gradients = self.run_backwards(cells)
        self.update_parameters(gradients, learning_rate)
        return loss, gradients, cells[-1].ao
        
    def train(self, data, ix_to_char, char_to_ix, num_iterations=35000, learning_rate=0.01):
        # data:  list of lines, from f.readlines()
        np.random.seed(0)
        np.random.shuffle(data)
        
        # Optimization loop
        loss = get_initial_loss(self.n_x, 7) #??
        a0 = np.zeros((self.n_a, 1))
        for j in range(num_iterations):
            index = j % len(data)
            X = [char_to_ix[ch] for ch in data[index]]
            
            curr_loss, gradients, a0 = self.optimize(X, a0, learning_rate=0.01)
            loss = smooth(loss, curr_loss)
#             if j % 100 == 0:
#                 print(curr_loss, loss)      
            if j % 2000 == 0:
                print('Iteration: %d, Loss: %f' % (j, loss) + '\n')
                seed = 0
                for name in range(7):

                    # Sample indices and print them
                    sampled_indices = self.sample(char_to_ix['\n'], seed=seed)
                    print_sample(sampled_indices, ix_to_char)
                    seed += 1  # To get the same result for grading purposed, increment the seed by one. 
                print('\n')
        return curr_loss
    
    def sample(self, eos=0, max_len=50, seed=1):
        
        Wax, Waa, ba, Wya, by = [self.parameters[k] for k in ['Wax', 'Waa', 'ba', 'Wya', 'by']]
        n_x, n_a = self.n_x, self.n_a
        
        indices = []
        idx = -1 
        counter = 0
        
        # 1. init x and a_prev
        x = np.zeros((n_x, 1))
        ai = np.zeros((n_a, 1))
        while (idx != eos and counter != max_len):
            # 2. calculate
            a = np.tanh(np.dot(Waa, ai) + np.dot(Wax, x) + ba)
            y = softmax(np.dot(Wya, a) + by)

            # 3. Sample the index of a character within the vocabulary from the probability distribution y
            np.random.seed(counter + seed) 
            idx = np.random.choice(n_x, p = y.ravel())
            indices.append(idx)
            
            # 4. next x = y : x<t> = y<t-1>, x : one-hot encoding
            x = np.zeros((vocab_size, 1))
            x[idx] = 1
            
            seed += 1
            counter +=1
            ai = a

        if (counter == max_len):
            indices.append(eos)
        return indices
        


In [179]:
with open('dinos.txt', 'r') as f:
    data = f.readlines()

data = [x.lower() for x in data]
print(len(data))
print(data[0])

model = CharacterLanguageModel()
model.train(data, ix_to_char, char_to_ix, num_iterations=20000)

1536
aachenosaurus

Iteration: 0, Loss: 23.087336

Nkzxwtdmfqoeyhsqwasjkjvu
Kneb
Kzxwtdmfqoeyhsqwasjkjvu
Neb
Zxwtdmfqoeyhsqwasjkjvu
Eb
Xwtdmfqoeyhsqwasjkjvu


Iteration: 2000, Loss: 27.950466

Mhytrpdmeromxgortariontoclusuonancesatlasalepdtonp
Hledalpsamantisaurus
Iwtrpdlerndxhortarinesganusvkecielulen
Macalpsamantisaurus
Ytrpckgoraurus
A
Troligoraurus


Iteration: 4000, Loss: 25.894695

Onytos
Kledalosaurus
Lytosaurus
Oma
Xtrolonmkaveros
Cabasemachus
Toraraurus


Iteration: 6000, Loss: 24.697856

Rixtosaurus
Nneeaitos
Nytosaurus
Racalosaurus
Xtroionosaurus
Gaalosaurus
Troionosaurus


Iteration: 8000, Loss: 24.231165

Phyusodon
Lomaaerond
Mytrodon
Pgaagptok
Yussangosaurus
Gaaerrdcaptosaurus
Trodomor


Iteration: 10000, Loss: 24.000355

Onyusaurus
Mica
Myusodon
Ola
Yuspandosaurus
Fa
Trocheoraxaurucoonatocesaurus


Iteration: 12000, Loss: 23.363365

Phytosaurus
Miecanosaurus
Myuspenatoptor
Pedahosaurus
Yvqsator
Gaaerosaurus
Ustanesaurus


Iteration: 14000, Loss: 23.307418

Onyushaphophyl

19.659786898248683



需要注意的地方：

1. 每次迭代（optimize）之后，输出的最后一个a， 作为下一次optimize的 a0。 循环使用。 这个之前没发现。不知道原因？？

2. 与原版本相比， 在迭代 400多次之后， loss就有一点点偏差了。 不知道哪里计算开始出现偏差。不过好在比原版本的loss小一点。输出也是有意义的。

设计思想：

RNN cell 会存储一些状态，使用一些参数。 每次 FP的时候，更新一些状态，计算BP的时候，会用到这些状态，然后更新参数。
训练的时候，生成一串cell， 计算FP和BP。

Loja！！

## 2 写诗，莎士比亚

 

In [185]:
from __future__ import print_function
from keras.callbacks import LambdaCallback
from keras.models import Model, load_model, Sequential
from keras.layers import Dense, Activation, Dropout, Input, Masking
from keras.layers import LSTM
from keras.utils.data_utils import get_file
from keras.preprocessing.sequence import pad_sequences
import sys
import io

print("Loading text data...")
text = io.open('shakespeare.txt', encoding='utf-8').read().lower()
print('corpus length:', len(text))

Tx = 40
chars = sorted(list(set(text)))
char_indices = dict((c, i) for i, c in enumerate(chars))
indices_char = dict((i, c) for i, c in enumerate(chars))
print('number of unique characters in the corpus:', len(chars))
print(indices_char)

Loading text data...
corpus length: 94275
number of unique characters in the corpus: 38
{0: '\n', 1: ' ', 2: '!', 3: "'", 4: '(', 5: ')', 6: ',', 7: '-', 8: '.', 9: ':', 10: ';', 11: '?', 12: 'a', 13: 'b', 14: 'c', 15: 'd', 16: 'e', 17: 'f', 18: 'g', 19: 'h', 20: 'i', 21: 'j', 22: 'k', 23: 'l', 24: 'm', 25: 'n', 26: 'o', 27: 'p', 28: 'q', 29: 'r', 30: 's', 31: 't', 32: 'u', 33: 'v', 34: 'w', 35: 'x', 36: 'y', 37: 'z'}


In [188]:

def build_data(text, Tx = 40, stride = 3):
    """
    Create a training set by scanning a window of size Tx over the text corpus, with stride 3.
    
    Arguments:
    text -- string, corpus of Shakespearian poem
    Tx -- sequence length, number of time-steps (or characters) in one training example
    stride -- how much the window shifts itself while scanning
    
    Returns:
    X -- list of training examples
    Y -- list of training labels
    """ 
    X = []
    Y = []
    for i in range(0, len(text) - Tx, stride):
        X.append(text[i: i + Tx])
        Y.append(text[i + Tx])    
    print('number of training examples:', len(X))
    return X, Y


def vectorization(X, Y, n_x, char_indices, Tx = 40):
    """
    Convert X and Y (lists) into arrays to be given to a recurrent neural network.
    
    Arguments:
    X -- 
    Y -- 
    Tx -- integer, sequence length
    
    Returns:
    x -- array of shape (m, Tx, len(chars))
    y -- array of shape (m, len(chars))
    """
    
    m = len(X)
    x = np.zeros((m, Tx, n_x), dtype=np.bool)
    y = np.zeros((m, n_x), dtype=np.bool)
    for i, sentence in enumerate(X):
        for t, char in enumerate(sentence):
            x[i, t, char_indices[char]] = 1
        y[i, char_indices[Y[i]]] = 1
        
    return x, y 

print("Creating training set...")
X, Y = build_data(text, Tx, stride = 3)
print("Vectorizing training set...")
x, y = vectorization(X, Y, n_x = len(chars), char_indices = char_indices)

Creating training set...
number of training examples: 31412
Vectorizing training set...


In [187]:
print("Loading model...")
model = load_model('models/model_shakespeare_kiank_350_epoch.h5')

Loading model...


In [189]:
model.fit(x, y, batch_size=128, epochs=1, callbacks=[])

Epoch 1/1


<keras.callbacks.History at 0x11db2da90>

In [203]:
def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
    preds = np.asarray(preds).astype('float64')
    preds = np.log(preds) / temperature
    exp_preds = np.exp(preds)
    preds = exp_preds / np.sum(exp_preds)    # softmax
    probas = np.random.multinomial(1, preds, 1)    # 掷1次🎲 ？ 出现哪一面。。。 [0...1...0]
    out = np.random.choice(range(len(chars)), p = probas.ravel())
    return out
    #return np.argmax(probas)
    
def generate_output():
    generated = ''
    #sentence = text[start_index: start_index + Tx]
    #sentence = '0'*Tx
    usr_input = input("Write the beginning of your poem, the Shakespeare machine will complete it. Your input is: ")
    # zero pad the sentence to Tx characters.
    sentence = ('{0:0>' + str(Tx) + '}').format(usr_input).lower()
    generated += usr_input 

    sys.stdout.write("\n\nHere is your poem: \n\n") 
    sys.stdout.write(usr_input)
    for i in range(400):

        x_pred = np.zeros((1, Tx, len(chars)))

        for t, char in enumerate(sentence):
            if char != '0':
                x_pred[0, t, char_indices[char]] = 1.

        preds = model.predict(x_pred, verbose=0)[0]
        next_index = sample(preds, temperature = 1.0)
        next_char = indices_char[next_index]

        generated += next_char
        sentence = sentence[1:] + next_char

        sys.stdout.write(next_char)
        sys.stdout.flush()

        if next_char == '\n':
            continue

In [204]:
# Run this cell to try with different inputs without having to re-train the model 
generate_output()

Write the beginning of your poem, the Shakespeare machine will complete it. Your input is: hello


Here is your poem: 

hellowe:
thint no nhe cow let yet net thus widl cenn as ofe,
the bibtt of my knew mate batuted his growl rier,
which persed wimn dopr tom, thy wasterfoe,
to hil crothes urufnes the riming stet dannter:
what hin gray who braws a dacm tot con  now
thought is fides vieture my hell yeur o thee.

so thon that i pun gold thy mate woed one,
whins the lich thy happlint who thees bid,
thy sich basth hir sides b


总结一下：

用的LSTM 模型， 还是 Character Lever Model。 用若干字符预测下一个字符。  训练的时候用40个字符预测下一个字符。

比较神奇的是， 除了模型，没有给出其他规则，比如换行、','后面基本上都是换行。 学习的时候自己就会了。

输出的玩意也不知所云。