# Recurrent Neural Network

In [1]:
import numpy as np

In [2]:
def softmax(x):
    e_x = np.exp(x - np.max(x))
    return e_x / e_x.sum(axis=0)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [3]:
def init_adam(params):
    L = len(params)
    v = {}
    s = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros(params["W" + str(l+1)].shape)
        v["db" + str(l+1)] = np.zeros(params["b" + str(l+1)].shape)
        s["dW" + str(l+1)] = np.zeros(params["W" + str(l+1)].shape)
        s["db" + str(l+1)] = np.zeros(params["b" + str(l+1)].shape)
        
    return v, s

In [4]:
def update_params_adam(params, grads, v, s, t, lr=0.01, beta1=0.9, beta2=0.999,
                      epsilon=1e-8):
    L = len(params)
    v_corrected = {}
    s_corrected = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["dW" + str(l+1)]
        v["db" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1 - beta1) * grads["db" + str(l+1)]
        
        v_corrected["dW" + str(l+1)] = d["dW" + str(l+1)] / (1 - beta1**t)
        v_corrected["db" + str(l+1)] = d["db" + str(l+1)] / (1 - beta1**t)
        
        s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * grads["dW" + str(l+1)]
        s["db" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1 - beta2) * grads["db" + str(l+1)]
        
        params["W" + str(l+1)] = params["W" + str(l+1)] - lr*v_corrected["dW" + str(l+1)] / np.sqrt(s_corrected["dW" + str(l+1)] + epsilon)
        params["b" + str(l+1)] = params["b" + str(l+1)] - lr*v_corrected["db" + str(l+1)] / np.sqrt(s_corrected["db" + str(l+1)] + epsilon)
        
    return params, v, s

## Main code

In [5]:
def rnn_cell_forward(xt, a_prev, params):
    Wax = params["Wax"]
    Waa = params["Waa"]
    Wya = params["Wya"]
    ba  = params["ba"]
    by  = params["by"]
    
    a_next = np.tanh(np.dot(Wax, xt) + np.dot(Waa, a_prev) + ba)
    
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    
    cache = (a_next, a_prev, xt, params)
    
    return a_next, yt_pred, cache

In [6]:
np.random.seed(1)
xt = np.random.randn(3, 10)
a_prev = np.random.randn(5, 10)
Waa = np.random.randn(5, 5)
Wax = np.random.randn(5, 3)
Wya = np.random.randn(2, 5)
ba = np.random.randn(5, 1)
by = np.random.randn(2, 1)
params = {"Waa":Waa, "Wax":Wax, "Wya":Wya, "ba":ba, "by":by}

a_next, yt_pred, cache = rnn_cell_forward(xt, a_prev, params)

In [7]:
a_next[4], a_next.shape, yt_pred[1], yt_pred.shape

(array([ 0.59584544,  0.18141802,  0.61311866,  0.99808218,  0.85016201,
         0.99980978, -0.18887155,  0.99815551,  0.6531151 ,  0.82872037]),
 (5, 10),
 array([0.9888161 , 0.01682021, 0.21140899, 0.36817467, 0.98988387,
        0.88945212, 0.36920224, 0.9966312 , 0.9982559 , 0.17746526]),
 (2, 10))

In [None]:
de rnn_forward(x, a0, params):
    caches = []
    n_x, m, T_x = x.shape
    n_y, n_a = params["Wya"].shape
    
    a = np.zeros((n_a, m, T_x))
    y_pred = np.zeros((n_y, m, T_x))