# Recurrent Neural Network (RNN)

## Import dependencies

In [1]:
import numpy as np

## Load dataset

In [6]:
data = open('../datasets/wikitext-2-raw/wiki.train.raw', 'r').read()
chars = sorted(list(set(data)))

In [8]:
data_size = len(data)
char_size = len(chars)
print('Data size = {:,}\nChar size = {:,}'.format(data_size, char_size))

Data size = 10,918,892
Char size = 1,013


### Characters to index and vice versa

In [10]:
char_2_idx = {ch: i for i,ch in enumerate(chars)}
idx_2_char = {i: ch for i,ch in enumerate(chars)}
print('c2i = {:,}\ni2c = {:,}'.format(len(char_2_idx), len(idx_2_char)))

c2i = 1,013
i2c = 1,013


### Testing vectorization

In [16]:
vector_a = np.zeros(shape=[char_size])
idx_4_a = char_2_idx['a']
vector_a[idx_4_a] = 1
print(vector_a)

[ 0.  0.  0. ...,  0.  0.  0.]


## Define Hyperparameters

In [20]:
# Training
hidden_size = 100
seq_length = 25
learning_rate = 1e-1
max_iter = 1000
log_step = 100

# Model parameters
Wxh = np.random.randn(hidden_size, char_size) * 0.01
Whh = np.random.randn(hidden_size, hidden_size) * 0.01
Why = np.random.randn(char_size, hidden_size) * 0.01
bh = np.zeros(shape=[hidden_size, 1])
by = np.zeros(shape=[char_size, 1])

## Building the Network

In [None]:
def network(inputs, labels, prev_hidden):

    """
    The recurrent neural network
    :param inputs: 
            one hot input chars
    :param labels: 
            next character in the sequence
    :param prev_hidden: 
            previous hidden layer or hidden layer at previous time step
    """
    xs, hs, ys, ps = {}, {}, {}, {}
    # !- Copying the hidden layer at previous time step
    hs[-1] = np.copy(prev_hidden)
    # !- Initialize loss to 0
    loss = 0
    
    # !- Forward pass
    for t in range(len(inputs)):
        xs[t] = np.zeros(shape=[char_size, 1])
        xs[t][inputs[t]] = 1  # input @ current time step
        hs[t] = np.tanh(np.dot(Wxh, xs[t]) + np.dot(Whh, hs[t-1]) + bh)  # hidden state @ current time step
        ys[t] = np.dot(Why, hs[t]) + by  # un-normalized probability
        ps[t] = np.exp(ys[t]) / np.sum(np.exp(ys[t]))  # normalized probability
        loss += -np.log(ps[t][labels[t], 0])  # -ve log likelihood
    
    # !- Backward pass
    dWxh, dWhh, dWhy = np.zeros_like(Wxh), np.zeros_like(Whh), np.zeros_like(Why)
    dbh, dby = np.zeros_like(bh), np.zeros_like(by)
    dh_next = np.zeros(hs[0])  # Next hidden layer
    for t in reversed(range(len(inputs))):
        dy = np.copy(ps[t])  # copying over the output probabilities
        # output derivative = output probabilities - 1
        dy[labels[t]] -= 1
        # hidden to output derivative = output derivative * hidden state's transpose
        dWhy += np.dot(dy, hs[t].T)
        dby += dy  # output bias derivative = output derivative
        # !- Back propagation
        dh = np.dot(Why.T, dy) + dh_next
        dh_raw = (1 - hs[t] * hs[t]) * dh
        dWhh += np.dot(dh_raw, hs[t-1].T)
        dbh += dh_raw
        dWxh += np.dot(dh_raw, xs[t].T)
        dh_next = np.dot(Whh.T, dh_raw)
    for d_param in [dWxh, dWhh, dWhy, bh, by]:
        np.clip(d_param, -5, 5, out=d_param)
    return [loss, dWxh, dWhh, dWhy, dbh, dby, hs[len(inputs)-1]]

### Helper function for generating next character

In [None]:
def generate_char():
    pass