In [1]:
import numpy as np

In [3]:
def softmax(x):
    return np.exp(x - np.max(x))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def init_adam(params):
    L = len(params)
    x = {}
    s = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = np.zeros(params["W" + str(l+1)].shape)
        v["db" + str(l+1)] = np.zeros(params["b" + str(l+1)].shape)        
        v["dW" + str(l+1)] = np.zeros(params["W" + str(l+1)].shape)        
        v["db" + str(l+1)] = np.zeros(params["b" + str(l+1)].shape)        
    return v, s

In [4]:
def update_params_with_adam(params, grads, v, s, t, lr=0.01,
                            beta1=0.9, beta2=0.999, epsilon=1e-8):
    L = len(params) // 2
    v_corrected = {}
    s_corrected = {}
    
    for l in range(L):
        v["dW" + str(l+1)] = beta1 * v["dW" + str(l+1)] + (1-beta1) * grads["dW" + str(l+1)]
        v["db" + str(l+1)] = beta1 * v["db" + str(l+1)] + (1-beta1) * grads["db" + str(l+1)]
        
        v_corrected["dW" + str(l+1)] = v["dW" + str(l+1)] / (1 - beta1**t)
        v_corrected["db" + str(l+1)] = v["db" + str(l+1)] / (1 - beta1**t)
        
        s["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] + (1-beta2) * (grads["dW" + str(l+1)]**2)
        s["db" + str(l+1)] = beta2 * s["db" + str(l+1)] + (1-beta2) * (grads["db" + str(l+1)]**2)
                
        s_corrected["dW" + str(l+1)] = beta2 * s["dW" + str(l+1)] / (1 - beta2 ** t)
        s_corrected["db" + str(l+1)] = beta2 * s["db" + str(l+1)] / (1 - beta2 ** t)
    
        params["W" + str(l+1)] = (params["W" + str(l+1)] - lr *
                                  v_corrected["dW" + str(l+1)] /
                                  np.sqrt(s_corrected["dW" + str(l+1)] + epsilon))
        params["W" + str(l+1)] = (params["b" + str(l+1)] - lr *
                                  v_corrected["db" + str(l+1)] /
                                  np.sqrt(s_corrected["db" + str(l+1)] + epsilon))
        
        return params, v, s

In [5]:
def rnn_cell_forward(xt, a_prev, params):
    Wax = params["Wax"]
    Waa = params["Waa"]
    Wya = params["Wya"]
    ba = params["ba"]
    by = params["by"]
    
    a_next = np.tanh(np.dot(Waa, a_prev) + np.dot(Wax, xt) + ba)
    
    yt_pred = softmax(np.dot(Wya, a_next) + by)
    
    cache = (a_next, a_prev, xt, params)
    
    return a_next, yt_pred, cache

In [6]:
xt_tmp = np.random.randn(3,10)
a_prev_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Waa'] = np.random.randn(5,5)
parameters_tmp['Wax'] = np.random.randn(5,3)
parameters_tmp['Wya'] = np.random.randn(2,5)
parameters_tmp['ba'] = np.random.randn(5,1)
parameters_tmp['by'] = np.random.randn(2,1)

a_next_tmp, yt_pred_tmp, cache_tmp = rnn_cell_forward(xt_tmp, a_prev_tmp, parameters_tmp)
print("a_next[4] = \n", a_next_tmp[4])
print("a_next.shape = \n", a_next_tmp.shape)
print("yt_pred[1] =\n", yt_pred_tmp[1])
print("yt_pred.shape = \n", yt_pred_tmp.shape)

a_next[4] = 
 [ 0.59584544  0.18141802  0.61311866  0.99808218  0.85016201  0.99980978
 -0.18887155  0.99815551  0.6531151   0.82872037]
a_next.shape = 
 (5, 10)
yt_pred[1] =
 [0.40697392 0.00135672 0.00439143 0.0052786  0.09032764 0.01886558
 0.02257065 0.48187669 1.         0.01624725]
yt_pred.shape = 
 (2, 10)


In [9]:
def rnn_forward(x, a0, params):
    caches = []
    
    n_x, m, T_x = x.shape
    n_y, n_a = params["Wya"].shape
    
    a = np.zeros((n_a, m, T_x))
    y_pred = np.zeros((n_y, m, T_x))
    
    a_next = a0
    
    for t in range(T_x):
        xt = x[:, :, t]
        a_next, yt_pred, cache = rnn_cell_forward(xt, a_next, params)
        a[:, :, t] = a_next
        y_pred[:, :, t] = yt_pred
        caches.append(cache)
    caches = (caches, x)
    
    return a, y_pred, caches

In [10]:
x_tmp = np.random.randn(3,10,4)
a0_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Waa'] = np.random.randn(5,5)
parameters_tmp['Wax'] = np.random.randn(5,3)
parameters_tmp['Wya'] = np.random.randn(2,5)
parameters_tmp['ba'] = np.random.randn(5,1)
parameters_tmp['by'] = np.random.randn(2,1)

a_tmp, y_pred_tmp, caches_tmp = rnn_forward(x_tmp, a0_tmp, parameters_tmp)
print("a[4][1] = \n", a_tmp[4][1])
print("a.shape = \n", a_tmp.shape)
print("y_pred[1][3] =\n", y_pred_tmp[1][3])
print("y_pred.shape = \n", y_pred_tmp.shape)
print("caches[1][1][3] =\n", caches_tmp[1][1][3])
print("len(caches) = \n", len(caches_tmp))

a[4][1] = 
 [-0.99999375  0.77911235 -0.99861469 -0.99833267]
a.shape = 
 (5, 10, 4)
y_pred[1][3] =
 [0.03105508 0.13235172 0.00596909 0.08977611]
y_pred.shape = 
 (2, 10, 4)
caches[1][1][3] =
 [-1.1425182  -0.34934272 -0.20889423  0.58662319]
len(caches) = 
 2


In [19]:
def lstm_cell_forward(xt, a_prev, c_prev, params):
    Wf = params["Wf"]
    bf = params["bf"]
    Wi = params["Wi"]
    bi = params["bi"]
    Wc = params["Wc"]
    bc = params["bc"]
    Wo = params["Wo"]
    bo = params["bo"]
    Wy = params["Wy"]
    by = params["by"]
    
    n_x, m = xt.shape
    n_y, n_a = Wy.shape
    
    concat = np.concatenate((a_prev, xt), axis=0)
    
    ft = sigmoid(np.dot(Wf, concat) + bf)
    it = sigmoid(np.dot(Wi, concat) + bi)    
    cct = sigmoid(np.dot(Wc, concat) + bc)    
    ot = sigmoid(np.dot(Wo, concat) + bo)
    c_next = ft * c_prev + it * cct
    a_next = ot * np.tanh(c_next)
    
    yt_pred = softmax(np.dot(Wy, a_next) + by)
    
    cache = (a_next, c_next, a_prev, c_prev, ft, it, cct, ot, xt, params)
    
    return a_next, c_next, yt_pred, cache

In [20]:
xt_tmp = np.random.randn(3,10)
a_prev_tmp = np.random.randn(5,10)
c_prev_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['bf'] = np.random.randn(5,1)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['bi'] = np.random.randn(5,1)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['bo'] = np.random.randn(5,1)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['bc'] = np.random.randn(5,1)
parameters_tmp['Wy'] = np.random.randn(2,5)
parameters_tmp['by'] = np.random.randn(2,1)

a_next_tmp, c_next_tmp, yt_tmp, cache_tmp = lstm_cell_forward(xt_tmp, a_prev_tmp, c_prev_tmp, parameters_tmp)
print("a_next[4] = \n", a_next_tmp[4])
print("a_next.shape = ", a_next_tmp.shape)
print("c_next[2] = \n", c_next_tmp[2])
print("c_next.shape = ", c_next_tmp.shape)
print("yt[1] =", yt_tmp[1])
print("yt.shape = ", yt_tmp.shape)
print("cache[1][3] =\n", cache_tmp[1][3])
print("len(cache) = ", len(cache_tmp))

a_next[4] = 
 [ 0.04176323  0.00367307  0.02758262  0.24039567 -0.80830595  0.00138345
  0.83514496  0.33095507 -0.00216128  0.57157844]
a_next.shape =  (5, 10)
c_next[2] = 
 [ 0.60075867  1.22816096  0.40413742  0.73488957 -0.92348061  0.38588177
  0.81438313 -0.17660798  0.04070506  0.24633092]
c_next.shape =  (5, 10)
yt[1] = [0.02420857 0.0128545  0.01215836 0.0122021  0.04586508 0.0168491
 0.00790704 0.02483387 0.0238743  0.01023779]
yt.shape =  (2, 10)
cache[1][3] =
 [-0.15763477  0.99633321  0.6900409  -0.02216185  0.03037478 -0.37931179
  0.07209541 -0.7651992   1.33176062  0.49481862]
len(cache) =  10


In [25]:
def lstm_forward(x, a0, params):
    caches = []
    Wy = params["Wy"]
    n_x, m, T_x = x.shape
    n_y, n_a = Wy.shape
    
    a = np.zeros((n_a, m, T_x))
    c = np.zeros((n_a, m, T_x))    
    y = np.zeros((n_y, m, T_x))   
    
    a_next = a0
    c_next = np.zeros(a0.shape)
    
    for t in range(T_x):
        xt = x[:, :, t]
        a_next, c_next, yt, cache = lstm_cell_forward(xt, a_next, c_next, params)
        a[:, :, t] = a_next
        c[:, :, t] = c_next
        y[:, :, t] = yt
        caches.append(cache)
        
    caches = (caches, x)
    
    return a, y, c, caches

In [26]:
np.random.seed(1)
x_tmp = np.random.randn(3,10,7)
a0_tmp = np.random.randn(5,10)
parameters_tmp = {}
parameters_tmp['Wf'] = np.random.randn(5, 5+3)
parameters_tmp['bf'] = np.random.randn(5,1)
parameters_tmp['Wi'] = np.random.randn(5, 5+3)
parameters_tmp['bi']= np.random.randn(5,1)
parameters_tmp['Wo'] = np.random.randn(5, 5+3)
parameters_tmp['bo'] = np.random.randn(5,1)
parameters_tmp['Wc'] = np.random.randn(5, 5+3)
parameters_tmp['bc'] = np.random.randn(5,1)
parameters_tmp['Wy'] = np.random.randn(2,5)
parameters_tmp['by'] = np.random.randn(2,1)

a_tmp, y_tmp, c_tmp, caches_tmp = lstm_forward(x_tmp, a0_tmp, parameters_tmp)
print("a[4][3][6] = ", a_tmp[4][3][6])
print("a.shape = ", a_tmp.shape)
print("y[1][4][3] =", y_tmp[1][4][3])
print("y.shape = ", y_tmp.shape)
print("caches[1][1][1] =\n", caches_tmp[1][1][1])
print("c[1][2][1]", c_tmp[1][2][1])
print("len(caches) = ", len(caches_tmp))

TypeError: lstm_cell_forward() takes 3 positional arguments but 4 were given