<a href="https://colab.research.google.com/github/vsingh9076/Natural_Language_Processing/blob/master/RNN/RNN_Scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import numpy as np

class SimpleCharRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights and biases
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden to hidden
        self.Why = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output
        self.bh = np.zeros((hidden_size, 1))  # Hidden bias
        self.by = np.zeros((output_size, 1))  # Output bias

        # Vocabulary
        self.char_to_ix = {}
        self.ix_to_char = {}

    def build_vocab(self, text):
        chars = sorted(set(text))
        self.char_to_ix = {ch: i for i, ch in enumerate(chars)}
        self.ix_to_char = {i: ch for i, ch in enumerate(chars)}

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, inputs, h_prev):
        # Update hidden state
        self.h_next = np.tanh(np.dot(self.Wxh, inputs) + np.dot(self.Whh, h_prev) + self.bh)

        # Compute output
        output = np.dot(self.Why, self.h_next) + self.by
        output_probs = self.sigmoid(output)

        return output_probs, self.h_next

    def backward(self, inputs, targets, h_prev, output_probs, learning_rate=0.01):
        # Compute gradients
        dWhy = np.dot((output_probs - targets), self.h_next.T)
        dby = output_probs - targets
        dh_next = np.dot(self.Why.T, dby) + np.dot(self.Whh.T, (1 - self.h_next**2) * h_prev)
        dWhh = np.dot((1 - self.h_next**2) * h_prev, h_prev.T)
        dWxh = np.dot((1 - self.h_next**2) * h_prev, inputs.T)
        dbh = (1 - self.h_next**2) * h_prev

        # Update parameters
        self.Why -= learning_rate * dWhy
        self.by -= learning_rate * dby
        self.Whh -= learning_rate * dWhh
        self.Wxh -= learning_rate * dWxh
        self.bh -= learning_rate * dbh

    def train(self, text, learning_rate=0.01, epochs=1000):
        self.build_vocab(text)
        self.output_size = len(self.char_to_ix)

        for epoch in range(epochs):
            h_prev = np.zeros((self.hidden_size, 1))  # Initial hidden state
            loss = 0

            for t in range(len(text)-1):
                # Forward pass
                x = np.zeros((self.input_size, 1))
                x[self.char_to_ix[text[t]]] = 1  # One-hot encoding for the character
                y, h_prev = self.forward(x, h_prev)

                # Compute loss
                target = np.zeros((self.output_size, 1))
                target[self.char_to_ix[text[t+1]]] = 1  # One-hot encoding for the next character
                loss += -np.sum(target * np.log(y))

                # Backpropagation
                self.backward(x, target, h_prev, y, learning_rate)

            # Print loss for monitoring training progress
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

# Example usage
text_data = "hello world"
input_size = len(set(text_data))
hidden_size = 10
output_size = len(set(text_data))

char_rnn = SimpleCharRNN(input_size, hidden_size, output_size)
char_rnn.train(text_data)


Epoch 0, Loss: 7.023956792935076
Epoch 100, Loss: 15.45552191295862
Epoch 200, Loss: 17.414384196189374
Epoch 300, Loss: 18.020117181435406
Epoch 400, Loss: 18.238020568687755
Epoch 500, Loss: 18.321933207395478
Epoch 600, Loss: 18.355230814747525
Epoch 700, Loss: 18.36861875390664
Epoch 800, Loss: 18.374033122327575
Epoch 900, Loss: 18.37622856330542


In [9]:
import numpy as np

class SimpleCharRNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights and biases
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01  # Hidden to hidden
        self.Why = np.random.randn(output_size, hidden_size) * 0.01  # Hidden to output
        self.bh = np.zeros((hidden_size, 1))  # Hidden bias
        self.by = np.zeros((output_size, 1))  # Output bias

        # Vocabulary
        self.char_to_ix = {}
        self.ix_to_char = {}

    def build_vocab(self, text):
        chars = sorted(set(text))
        self.char_to_ix = {ch: i for i, ch in enumerate(chars)}
        self.ix_to_char = {i: ch for i, ch in enumerate(chars)}

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, inputs, h_prev):
        # Update hidden state
        self.h_next = np.tanh(np.dot(self.Wxh, inputs) + np.dot(self.Whh, h_prev) + self.bh)

        # Compute output
        output = np.dot(self.Why, self.h_next) + self.by
        output_probs = self.sigmoid(output)

        return output_probs, self.h_next

    def backward(self, inputs, targets, h_prev, output_probs, learning_rate=0.01):
        # Compute gradients
        dWhy = np.dot((output_probs - targets), self.h_next.T)
        dby = output_probs - targets
        dh_next = np.dot(self.Why.T, dby) + np.dot(self.Whh.T, (1 - self.h_next**2) * h_prev)
        dWhh = np.dot((1 - self.h_next**2) * h_prev, h_prev.T)
        dWxh = np.dot((1 - self.h_next**2) * h_prev, inputs.T)
        dbh = (1 - self.h_next**2) * h_prev

        # Update parameters
        self.Why -= learning_rate * dWhy
        self.by -= learning_rate * dby
        self.Whh -= learning_rate * dWhh
        self.Wxh -= learning_rate * dWxh
        self.bh -= learning_rate * dbh

    def train(self, text, learning_rate=0.01, epochs=1000):
        self.build_vocab(text)
        self.output_size = len(self.char_to_ix)

        for epoch in range(epochs):
            h_prev = np.zeros((self.hidden_size, 1))  # Initial hidden state
            loss = 0

            for t in range(len(text)-1):
                # Forward pass
                x = np.zeros((self.input_size, 1))
                x[self.char_to_ix[text[t]]] = 1  # One-hot encoding for the character
                y, h_prev = self.forward(x, h_prev)

                # Compute loss
                target = np.zeros((self.output_size, 1))
                target[self.char_to_ix[text[t+1]]] = 1  # One-hot encoding for the next character
                loss += -np.sum(target * np.log(y))

                # Backpropagation
                self.backward(x, target, h_prev, y, learning_rate)

            # Print loss for monitoring training progress
            if epoch % 100 == 0:
                print(f'Epoch {epoch}, Loss: {loss}')

# Example usage
text_data = "hello world"
input_size = len(set(text_data))
hidden_size = 10
output_size = len(set(text_data))

char_rnn = SimpleCharRNN(input_size, hidden_size, output_size)
char_rnn.train(text_data)


Epoch 0, Loss: 7.023458664808913
Epoch 100, Loss: 15.45564375087393
Epoch 200, Loss: 17.41438094076354
Epoch 300, Loss: 18.020105598891824
Epoch 400, Loss: 18.23801442421987
Epoch 500, Loss: 18.321930607379507
Epoch 600, Loss: 18.355229797461504
Epoch 700, Loss: 18.36861836939374
Epoch 800, Loss: 18.374032979475018
Epoch 900, Loss: 18.37622851073108
