<a href="https://colab.research.google.com/github/thanhnguyen2612/diveintocode-ml/blob/master/ML_sprint22_RNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Recurrent Neural Network

In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

%matplotlib inline

# [Problem 1] Forward propagation implementation of SimpleRNN

In [30]:
# Simple Recurrent layer
class SimpleRNN:
    def __init__(self, batch_size, n_sequences, n_features,
                 n_nodes, initializer, activation):
        self.batch_size = batch_size
        self.n_sequences = n_sequences
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.initializer = initializer
        self.activation = activation
        self.__init_weights()
    
    def __init_weights(self):
        self.W_x = self.initializer.W_x(self.n_features, self.n_nodes)
        self.W_h = self.initializer.W_h(self.n_nodes, self.n_nodes)
    
    def forward(self, X):
        h = np.zeros((self.batch_size, self.n_nodes))
        b = np.ones((self.n_nodes,))
        for i in range(self.n_sequences):
            h = self.activation.forward(X[:, i, :] @ self.W_x + h @ self.W_h + b)
        return h
    
    def backward(self, d):
        pass

# [Problem 2] Experiment of forward propagation with small sequence

In [31]:
x = np.array([[[1, 2], [3, 4]]]) / 100
w_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]]) / 100
w_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]]) / 100
batch_size = x.shape[0]
n_sequences = x.shape[1]
n_features = x.shape[2]
n_nodes = w_x.shape[1]
h = np.zeros((batch_size, n_nodes))
b = np.array([1, 1, 1, 1])

In [34]:
class DefaultInitializer:
    def W_x(self, *args):
        return w_x
    
    def W_h(self, *args):
        return w_h
    
    def B(self, )

class Tanh:
    def __init__(self):
        self.name = "tanh"
    def forward(self, X):
        return np.tanh(X)
    def backward(self, X):
        return 1 - self.forward(X) ** 2

rnn = SimpleRNN(batch_size=batch_size, n_sequences=n_sequences, n_features=n_features,
                n_nodes=n_nodes, initializer=DefaultInitializer(), activation=Tanh())

In [35]:
rnn.forward(x)

array([[0.79235792, 0.81445291, 0.83441399, 0.85019348]])

# [Problem 3] (Advance assignment) Implementation of backpropagation

In [41]:
# Other necessary functions
class SimpleInitializer:
    def __init__(self, sigma):
        self.sigma = sigma
    
    def W(self, *args):
        W = self.sigma * np.random.randn(*args)
        return W
    
    def B(self, *args):
        B = self.sigma * np.random.randn(1, *args)
        return B

class Tanh:
    def forward(self, A):
        self.A = A
        return np.tanh(self.A)
    def backward(self, dZ):
        dA = dZ * (1 - np.tanh(self.A)**2)
        return dA

In [46]:
class ScratchSimpleRNNClassifier:
    def __init__(self, batch_size, n_sequences, n_features,
                 n_nodes, initializer, activator):
        self.batch_size = batch_size
        self.n_sequences = n_sequences
        self.n_features = n_features
        self.n_nodes = n_nodes
        self.initializer = initializer
        self.activator = activator
        self.h_hist = []
        self.__init_weights()
    
    def __init_weights(self):
        self.W_x = self.initializer.W(self.n_features, self.n_nodes)
        self.W_h = self.initializer.W(self.n_nodes, self.n_nodes)
        self.B = self.initializer.B(self.n_nodes)
    
    def forward(self, X):
        self.X = X.copy()
        h = np.zeros((self.batch_size, self.n_nodes))
        for i in range(self.n_sequences):
            h = self.activator.forward(self.X[:, i, :] @ self.W_x + h @ self.W_h + self.B)
            self.h_hist.append(h)
        return h
    
    def backward(self, dH):
        dA = self.activator.backward(dH)
        for i in range(self.n_features)[::-1]:
            self.dB += np.sum(dA, axis=0)
            self.dW_x += self.X[:, i, :].T @ dA
            self.dW_h += h_hist[i].T @ dA
        
        dX = dA @ self.W_X
        dH = dA @ self.W_h
        return dX, dH

In [54]:
rnn = ScratchSimpleRNNClassifier(batch_size=batch_size, n_sequences=n_sequences, n_features=n_features,
                                 n_nodes=n_nodes, initializer=SimpleInitializer(0.01), activator=Tanh())

rnn.forward(x)

array([[ 0.00452989, -0.00212587, -0.00248087, -0.01694569]])

In [55]:
# Test default

rnn.W_x = np.array([[1, 3, 5, 7], [3, 5, 7, 8]]) / 100
rnn.W_h = np.array([[1, 3, 5, 7], [2, 4, 6, 8], [3, 5, 7, 8], [4, 6, 8, 10]]) / 100
rnn.B = np.array([1, 1, 1, 1])
rnn.forward(x)

array([[0.79235792, 0.81445291, 0.83441399, 0.85019348]])