In [None]:
import numpy as np

class SimpleRNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate

        # Weight initialization
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01  # Input to hidden
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01 # Hidden to hidden
        self.Why = np.random.randn(output_size, hidden_size) * 0.01 # Hidden to output
        self.bh = np.zeros((hidden_size, 1))  # Hidden bias
        self.by = np.zeros((output_size, 1))  # Output bias

    def forward(self, inputs):
        h_prev = np.zeros((self.hidden_size, 1))
        hs = {}
        ys = {}

        for t in range(len(inputs)):
            x = np.reshape(inputs[t], (-1, 1))
            h_prev = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h_prev) + self.bh)# activation functions

            y = np.dot(self.Why, h_prev) + self.by
            hs[t] = h_prev
            ys[t] = y

        return ys, hs

    def backward(self, inputs, targets, hs, ys):
        dWxh, dWhh, dWhy = np.zeros_like(self.Wxh), np.zeros_like(self.Whh), np.zeros_like(self.Why)
        dbh, dby = np.zeros_like(self.bh), np.zeros_like(self.by)
        dh_next = np.zeros((self.hidden_size, 1))

        for t in reversed(range(len(inputs))):
            dy = np.copy(ys[t])
            dy -= targets[t].reshape(-1, 1)

            dWhy += np.dot(dy, hs[t].T)
            dby += dy

            dh = np.dot(self.Why.T, dy) + dh_next
            dh_raw = (1 - hs[t] * hs[t]) * dh
            dbh += dh_raw
            dWxh += np.dot(dh_raw, inputs[t].reshape(1, -1))

            # Only update dWhh if t > 0 to avoid KeyError
            if t > 0:
                dWhh += np.dot(dh_raw, hs[t-1].T)

            dh_next = np.dot(self.Whh.T, dh_raw)

        for dparam in [dWxh, dWhh, dWhy, dbh, dby]:
            np.clip(dparam, -1, 1, out=dparam)

        self.Wxh -= self.learning_rate * dWxh
        self.Whh -= self.learning_rate * dWhh
        self.Why -= self.learning_rate * dWhy
        self.bh -= self.learning_rate * dbh
        self.by -= self.learning_rate * dby

    def train(self, inputs, targets, epochs=10):
        for epoch in range(epochs):
            ys, hs = self.forward(inputs)
            self.backward(inputs, targets, hs, ys)
            if epoch % 100 == 0:
                loss = np.sum((targets - np.array(list(ys.values())).squeeze()) ** 2)
                print(f'Epoch {epoch}, Loss: {loss}')
    def predict(self, inputs):
        ys, _ = self.forward(inputs)
        return list(ys.values())[-1]

# Example usage
inputs = np.array([30, 32, 34, 33, 31, 30, 29])  # Temperature for a week
targets = np.array([32, 34, 33, 31, 30, 29, 28])  # Next day temperature

rnn = SimpleRNN(input_size=1, hidden_size=10, output_size=1)
rnn.train(inputs, targets,epochs=1000)

Epoch 0, Loss: 6756.682259943567
Epoch 100, Loss: 2932.8348424316864
Epoch 200, Loss: 642.7041246556953
Epoch 300, Loss: 27.99982576879568
Epoch 400, Loss: 27.99981961081971
Epoch 500, Loss: 27.99981299087168
Epoch 600, Loss: 27.999805853769356
Epoch 700, Loss: 27.999798134992975
Epoch 800, Loss: 27.999789758579674
Epoch 900, Loss: 27.999780634407628


In [None]:
test_input = np.array([31, 30, 29, 28, 27, 26, 25])  # Last week
prediction = rnn.predict(test_input)
print(f'Predicted temperature: {prediction}')

Predicted temperature: [[30.99970323]]


# Long Short term memory neural networking implementation


In [None]:
import numpy as np

# Sample weather data (7 days of temperature data)
data = np.array([
    [20, 22, 23, 21, 20, 22, 24],  # Week 1
    [22, 23, 24, 22, 21, 23, 25],  # Week 2
    [21, 22, 23, 22, 21, 22, 24],  # Week 3
])

# Calculate mean and std for later use
data_mean = np.mean(data)
data_std = np.std(data)

# Normalize the data
data_normalized = (data - data_mean) / data_std

# Prepare input (X) and target (y) data
X = data_normalized[:, :-1]  # First 6 days of each week
y = data_normalized[:, -1]   # Last day of each week

# Hyperparameters
input_size = 1
hidden_size = 4
output_size = 1
learning_rate = 0.01
epochs = 2000

# Initialize weights
Wf = np.random.randn(hidden_size, hidden_size + input_size) * 0.01
Wi = np.random.randn(hidden_size, hidden_size + input_size) * 0.01
Wc = np.random.randn(hidden_size, hidden_size + input_size) * 0.01
Wo = np.random.randn(hidden_size, hidden_size + input_size) * 0.01
Wy = np.random.randn(output_size, hidden_size) * 0.01

bf = np.zeros((hidden_size, 1))
bi = np.zeros((hidden_size, 1))
bc = np.zeros((hidden_size, 1))
bo = np.zeros((hidden_size, 1))
by = np.zeros((output_size, 1))

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(y):
    return y * (1 - y)

def tanh(x):
    return np.tanh(x)

def dtanh(y):
    return 1 - y**2

def forward_pass(x, h_prev, c_prev):
    concat = np.vstack((h_prev, x))

    f = sigmoid(np.dot(Wf, concat) + bf)
    i = sigmoid(np.dot(Wi, concat) + bi)
    c_tilde = tanh(np.dot(Wc, concat) + bc)
    o = sigmoid(np.dot(Wo, concat) + bo)

    c = f * c_prev + i * c_tilde
    h = o * tanh(c)

    y = np.dot(Wy, h) + by

    cache = (f, i, c_tilde, o, c, h, y, concat, c_prev)
    return cache

def backward_pass(dy, cache, dc_next, dh_next):
    f, i, c_tilde, o, c, h, y, concat, c_prev = cache

    dy = dy.reshape(-1, 1)

    dh = np.dot(Wy.T, dy) + dh_next
    do = dh * tanh(c)
    dc = dh * o * dtanh(tanh(c)) + dc_next
    dc_tilde = dc * i
    di = dc * c_tilde
    df = dc * c_prev  # Fixed: use c_prev instead of concat[-hidden_size:]

    dWo = np.dot(do * dsigmoid(o), concat.T)
    dWi = np.dot(di * dsigmoid(i), concat.T)
    dWf = np.dot(df * dsigmoid(f), concat.T)
    dWc = np.dot(dc_tilde * dtanh(c_tilde), concat.T)

    dWy = np.dot(dy, h.T)
    dby = dy

    dbo = do * dsigmoid(o)
    dbi = di * dsigmoid(i)
    dbf = df * dsigmoid(f)
    dbc = dc_tilde * dtanh(c_tilde)

    dconcat = (np.dot(Wo.T, do * dsigmoid(o)) +
               np.dot(Wi.T, di * dsigmoid(i)) +
               np.dot(Wf.T, df * dsigmoid(f)) +
               np.dot(Wc.T, dc_tilde * dtanh(c_tilde)))

    dh_prev = dconcat[:hidden_size]
    dc_prev = f * dc

    return dWf, dWi, dWc, dWo, dWy, dbf, dbi, dbc, dbo, dby, dh_prev, dc_prev

# Training loop
for epoch in range(epochs):
    # Shuffle the data
    indices = np.arange(len(X))
    np.random.shuffle(indices)
    X_shuffled = X[indices]
    y_shuffled = y[indices]

    loss = 0
    for j in range(len(X_shuffled)):
        x = X_shuffled[j].reshape(-1, 1)
        target = y_shuffled[j]

        h = np.zeros((hidden_size, 1))
        c = np.zeros((hidden_size, 1))

        caches = []
        for t in range(6):  # 6 time steps (6 days)
            cache = forward_pass(x[t:t+1], h, c)
            caches.append(cache)
            _, _, _, _, c, h, _, _, _ = cache

        y_pred = caches[-1][6]
        loss += (y_pred - target)**2 / 2

        dy = y_pred - target
        dh_next = np.zeros_like(h)
        dc_next = np.zeros_like(c)

        dWf, dWi, dWc, dWo, dWy = np.zeros_like(Wf), np.zeros_like(Wi), np.zeros_like(Wc), np.zeros_like(Wo), np.zeros_like(Wy)
        dbf, dbi, dbc, dbo, dby = np.zeros_like(bf), np.zeros_like(bi), np.zeros_like(bc), np.zeros_like(bo), np.zeros_like(by)

        for t in reversed(range(6)):
            grad = backward_pass(dy, caches[t], dc_next, dh_next)
            dWf_, dWi_, dWc_, dWo_, dWy_, dbf_, dbi_, dbc_, dbo_, dby_, dh_next, dc_next = grad

            dWf += dWf_
            dWi += dWi_
            dWc += dWc_
            dWo += dWo_
            dWy += dWy_
            dbf += dbf_
            dbi += dbi_
            dbc += dbc_
            dbo += dbo_
            dby += dby_

        # Update weights with learning rate decay
        lr = learning_rate / (1 + epoch / 1000)  # Simple learning rate decay
        Wf -= lr * dWf
        Wi -= lr * dWi
        Wc -= lr * dWc
        Wo -= lr * dWo
        Wy -= lr * dWy
        bf -= lr * dbf
        bi -= lr * dbi
        bc -= lr * dbc
        bo -= lr * dbo
        by -= lr * dby

    if epoch % 100 == 0:
        print(f"Epoch {epoch}, Loss: {loss/len(X)}")

# Test the model
test_week = np.array([21, 23, 22, 20, 22, 23]).reshape(1, -1)
test_week_normalized = (test_week - data_mean) / data_std  # Normalize using training data statistics

h = np.zeros((hidden_size, 1))
c = np.zeros((hidden_size, 1))

for t in range(6):
    x = test_week_normalized[0, t].reshape(-1, 1)
    cache = forward_pass(x, h, c)
    _, _, _, _, c, h, _, _, _ = cache

y_pred = cache[6]
predicted_temp = y_pred * data_std + data_mean  # Denormalize using training data statistics
print(f"Predicted temperature for the 7th day: {predicted_temp[0, 0]:.2f}°C")

# Print actual temperatures for comparison
print(f"Actual temperatures: {data[:, -1]}")

Epoch 0, Loss: [[1.21044865]]
Epoch 100, Loss: [[0.06871926]]
Epoch 200, Loss: [[0.06832451]]
Epoch 300, Loss: [[0.06767133]]
Epoch 400, Loss: [[0.06602391]]
Epoch 500, Loss: [[0.06164647]]
Epoch 600, Loss: [[0.0523063]]
Epoch 700, Loss: [[0.03777212]]
Epoch 800, Loss: [[0.02229797]]
Epoch 900, Loss: [[0.01156307]]
Epoch 1000, Loss: [[0.00653585]]
Epoch 1100, Loss: [[0.0044739]]
Epoch 1200, Loss: [[0.00345836]]
Epoch 1300, Loss: [[0.00282572]]
Epoch 1400, Loss: [[0.00238426]]
Epoch 1500, Loss: [[0.0020593]]
Epoch 1600, Loss: [[0.00180792]]
Epoch 1700, Loss: [[0.00160362]]
Epoch 1800, Loss: [[0.00143845]]
Epoch 1900, Loss: [[0.00130179]]
Predicted temperature for the 7th day: 24.59°C
Actual temperatures: [24 25 24]


In [None]:
y_pred = cache[20]
predicted_temp = y_pred * np.std(data) + np.mean(data)

IndexError: tuple index out of range

In [None]:
def predict_next_day(past_6_days, model_data):
    # Normalize the input data
    normalized_input = (past_6_days - np.mean(model_data)) / np.std(model_data)

    h = np.zeros((hidden_size, 1))
    c = np.zeros((hidden_size, 1))

    for t in range(6):
        x = normalized_input[t].reshape(-1, 1)
        cache = forward_pass(x, h, c)
        _, _, _, _, c, h, _, _, _ = cache # Unpack all 9 values returned by forward_pass

    y_pred = cache[6]
    predicted_temp = y_pred * np.std(model_data) + np.mean(model_data)
    return predicted_temp[0, 0]

In [None]:
def temperature_prediction_interface():
    print("Enter the temperatures for the past 6 days:")
    past_6_days = []
    for i in range(6):
        temp = float(input(f"Day {i+1} temperature: "))
        past_6_days.append(temp)

    next_day_temp = predict_next_day(np.array(past_6_days))
    print(f"Predicted temperature for tomorrow: {next_day_temp:.2f}°C")

# Run the interface
temperature_prediction_interface()

Enter the temperatures for the past 6 days:
Day 1 temperature: 40
Day 2 temperature: 39
Day 3 temperature: 38
Day 4 temperature: 41
Day 5 temperature: 40
Day 6 temperature: 39


TypeError: predict_next_day() missing 1 required positional argument: 'model_data'

In [None]:
def temperature_prediction_interface():
    print("Enter the temperatures for the past 6 days:")
    past_6_days = []
    for i in range(6):
        temp = float(input(f"Day {i+1} temperature: "))
        past_6_days.append(temp)

    # Pass the original data to the prediction function
    next_day_temp = predict_next_day(np.array(past_6_days), data)  # 'data' is the original temperature data
    print(f"Predicted temperature for tomorrow: {next_day_temp:.2f}°C")

# Run the interface
temperature_prediction_interface()

Enter the temperatures for the past 6 days:
Day 1 temperature: 25
Day 2 temperature: 24
Day 3 temperature: 27
Day 4 temperature: 26
Day 5 temperature: 22
Day 6 temperature: 26
Predicted temperature for tomorrow: 26.38°C


In [13]:
import numpy as np

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01

        # Initialize biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1))
        self.last_inputs = inputs
        self.last_hs = { 0: h }

        # Forward pass
        for t, x in enumerate(inputs):
            h = np.tanh(np.dot(self.Wxh, x) + np.dot(self.Whh, h) + self.bh)
            self.last_hs[t+1] = h

        # Compute output
        y = np.dot(self.Why, h) + self.by
        p = np.exp(y) / np.sum(np.exp(y))

        return p, h

    def backward(self, d_y, learn_rate=2e-3):
        n = len(self.last_inputs)

        # Backprop into Why and by
        d_Why = np.dot(d_y, self.last_hs[n].T)
        d_by = d_y

        # Backprop into Whh, Wxh, and bh
        d_h = np.dot(self.Why.T, d_y)
        d_Whh = np.zeros_like(self.Whh)
        d_Wxh = np.zeros_like(self.Wxh)
        d_bh = np.zeros_like(self.bh)  # Make sure d_bh has the correct shape

        for t in reversed(range(n)):
            temp = (1 - self.last_hs[t+1] ** 2) * d_h
            d_bh += temp  # Now the shapes should match
            d_Whh += np.dot(temp, self.last_hs[t].T)
            d_Wxh += np.dot(temp, self.last_inputs[t].T)
            d_h = np.dot(self.Whh.T, temp)




        # Clip to prevent exploding gradients
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -5, 5, out=d)

        # Update weights and biases
        self.Wxh -= learn_rate * d_Wxh
        self.Whh -= learn_rate * d_Whh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

def predict_next_word(rnn, input_sequence, vocab):
    input_vector = [np.eye(len(vocab))[vocab[word]] for word in input_sequence]
    p, _ = rnn.forward(input_vector)
    next_word_index = np.argmax(p)

    # Convert the predicted index to a word
    vocab_list = list(vocab.keys())
    if 0 <= next_word_index < len(vocab_list):
        return vocab_list[next_word_index]
    else:
        return "Unknown"

def train_rnn(rnn, vocab, sentences, epochs=100):
    for epoch in range(epochs):
        total_loss = 0
        for sentence in sentences:
            inputs = [np.eye(len(vocab))[vocab[word]] for word in sentence[:-1]]
            target = np.zeros((len(vocab), 1))
            target[vocab[sentence[-1]]] = 1

            # Forward pass
            p, _ = rnn.forward(inputs)

            # Compute loss
            loss = -np.sum(target * np.log(p))
            total_loss += loss

            # Backward pass
            d_y = p - target
            rnn.backward(d_y)

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss / len(sentences)}")

# Example usage
vocab = {'hello': 0, 'world': 1, 'how': 2, 'are': 3, 'you': 4}
input_size = len(vocab)
hidden_size = 100
output_size = len(vocab)

rnn = RNN(input_size, hidden_size, output_size)

# Training data
sentences = [
    ['hello', 'how', 'are', 'you'],
    ['hello', 'world', 'how', 'are'],
    ['how', 'are', 'you', 'world']
]

# Train the RNN
train_rnn(rnn, vocab, sentences, epochs=100)

# Test the trained RNN
input_sequence = ['hello', 'how', 'are']
next_word = predict_next_word(rnn, input_sequence, vocab)
print(f"Predicted next word: {next_word}")

ValueError: non-broadcastable output operand with shape (100,1) doesn't match the broadcast shape (100,100)

In [14]:
import numpy as np

class RNN:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, input_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(output_size, hidden_size) * 0.01

        # Initialize biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((output_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1))
        self.last_inputs = inputs
        self.last_hs = { 0: h }

        # Forward pass
        for t, x in enumerate(inputs):
            h = np.tanh(np.dot(self.Wxh, x.reshape(-1, 1)) + np.dot(self.Whh, h) + self.bh)
            self.last_hs[t+1] = h

        # Compute output
        y = np.dot(self.Why, h) + self.by
        p = np.exp(y) / np.sum(np.exp(y))

        return p, h

    def backward(self, d_y, learn_rate=2e-3):
        n = len(self.last_inputs)

        # Backprop into Why and by
        d_Why = np.dot(d_y, self.last_hs[n].T)
        d_by = d_y

        # Backprop into Whh, Wxh, and bh
        d_h = np.dot(self.Why.T, d_y)
        d_Whh = np.zeros_like(self.Whh)
        d_Wxh = np.zeros_like(self.Wxh)
        d_bh = np.zeros_like(self.bh)

        for t in reversed(range(n)):
            temp = (1 - self.last_hs[t+1] ** 2) * d_h
            d_bh += temp
            d_Whh += np.dot(temp, self.last_hs[t].T)
            d_Wxh += np.dot(temp, self.last_inputs[t].reshape(1, -1))
            d_h = np.dot(self.Whh.T, temp)

        # Clip to prevent exploding gradients
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -5, 5, out=d)

        # Update weights and biases
        self.Wxh -= learn_rate * d_Wxh
        self.Whh -= learn_rate * d_Whh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

def predict_next_word(rnn, input_sequence, vocab):
    input_vector = [np.eye(len(vocab))[vocab[word]] for word in input_sequence]
    p, _ = rnn.forward(input_vector)
    next_word_index = np.argmax(p)

    # Convert the predicted index to a word
    vocab_list = list(vocab.keys())
    if 0 <= next_word_index < len(vocab_list):
        return vocab_list[next_word_index]
    else:
        return "Unknown"

def train_rnn(rnn, vocab, sentences, epochs=100):
    for epoch in range(epochs):
        total_loss = 0
        for sentence in sentences:
            inputs = [np.eye(len(vocab))[vocab[word]] for word in sentence[:-1]]
            target = np.zeros((len(vocab), 1))
            target[vocab[sentence[-1]]] = 1

            # Forward pass
            p, _ = rnn.forward(inputs)

            # Compute loss
            loss = -np.sum(target * np.log(p + 1e-8))  # Added small epsilon to avoid log(0)
            total_loss += loss

            # Backward pass
            d_y = p - target
            rnn.backward(d_y)

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss / len(sentences)}")

# Example usage
vocab = {'hello': 0, 'world': 1, 'how': 2, 'are': 3, 'you': 4}
input_size = len(vocab)
hidden_size = 100
output_size = len(vocab)

rnn = RNN(input_size, hidden_size, output_size)

# Training data
sentences = [
    ['hello', 'how', 'are', 'you'],
    ['hello', 'world', 'how', 'are'],
    ['how', 'are', 'you', 'world']
]

# Train the RNN
train_rnn(rnn, vocab, sentences, epochs=100)

# Test the trained RNN
input_sequence = ['hello', 'how', 'are']
next_word = predict_next_word(rnn, input_sequence, vocab)
print(f"Predicted next word: {next_word}")

Epoch 0, Loss: 1.608919099085386
Epoch 10, Loss: 1.6006489095838894
Epoch 20, Loss: 1.5925687247242786
Epoch 30, Loss: 1.5846725006284839
Epoch 40, Loss: 1.5769543694186414
Epoch 50, Loss: 1.56940863277602
Epoch 60, Loss: 1.5620297557105907
Epoch 70, Loss: 1.5548123605228046
Epoch 80, Loss: 1.5477512209417288
Epoch 90, Loss: 1.5408412564259188
Predicted next word: you


In [17]:
input_sequence = ['hello', 'how', 'world']
next_word = predict_next_word(rnn, input_sequence, vocab)
print(f"Predicted next word: {next_word}")

Predicted next word: are


# RNN for large textual data to predict next  word sequnece

In [22]:
import numpy as np
import re
from collections import defaultdict

class RNN:
    def __init__(self, vocab_size, hidden_size):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, vocab_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(vocab_size, hidden_size) * 0.01

        # Initialize biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((vocab_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1))
        self.last_inputs = inputs
        self.last_hs = { 0: h }

        # Forward pass
        for t, x in enumerate(inputs):
            h = np.tanh(np.dot(self.Wxh, x.reshape(-1, 1)) + np.dot(self.Whh, h) + self.bh)
            self.last_hs[t+1] = h

        # Compute output
        y = np.dot(self.Why, h) + self.by
        p = np.exp(y) / np.sum(np.exp(y))

        return p, h

    def backward(self, d_y, learn_rate=1e-3):
        n = len(self.last_inputs)

        # Backprop into Why and by
        d_Why = np.dot(d_y, self.last_hs[n].T)
        d_by = d_y

        # Backprop into Whh, Wxh, and bh
        d_h = np.dot(self.Why.T, d_y)
        d_Whh = np.zeros_like(self.Whh)
        d_Wxh = np.zeros_like(self.Wxh)
        d_bh = np.zeros_like(self.bh)

        for t in reversed(range(n)):
            temp = (1 - self.last_hs[t+1] ** 2) * d_h
            d_bh += temp
            d_Whh += np.dot(temp, self.last_hs[t].T)
            d_Wxh += np.dot(temp, self.last_inputs[t].reshape(1, -1))
            d_h = np.dot(self.Whh.T, temp)

        # Clip to prevent exploding gradients
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -5, 5, out=d)

        # Update weights and biases
        self.Wxh -= learn_rate * d_Wxh
        self.Whh -= learn_rate * d_Whh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

def preprocess_text(text):
    # Convert to lowercase and remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
    words = text.split()
    return words

def build_vocab(words):
    vocab = defaultdict(lambda: len(vocab))
    vocab['<unk>'] = 0  # Unknown token
    for word in words:
        vocab[word]
    return dict(vocab)

def encode_text(text, vocab):
    return [vocab[word] if word in vocab else vocab['<unk>'] for word in text]

def one_hot_encode(indices, vocab_size):
    return [np.eye(vocab_size)[i] for i in indices]

def train_rnn(rnn, vocab, text, seq_length, epochs=100, learn_rate=1e-3):
    encoded_text = encode_text(text, vocab)
    vocab_size = len(vocab)

    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(encoded_text) - seq_length, seq_length):
            inputs = one_hot_encode(encoded_text[i:i+seq_length], vocab_size)
            targets = encoded_text[i+1:i+seq_length+1]

            # Forward pass
            p, _ = rnn.forward(inputs)

            # Compute loss
            loss = -np.sum([np.log(p[t, targets[t]]) for t in range(seq_length)])
            total_loss += loss

            # Backward pass
            d_y = p.copy()
            for t in range(seq_length):
                d_y[targets[t], t] -= 1
            rnn.backward(d_y, learn_rate)

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss / (len(encoded_text) // seq_length)}")

def generate_text(rnn, vocab, seed_text, n_generate=100):
    inv_vocab = {v: k for k, v in vocab.items()}
    encoded_seed = encode_text(seed_text.split(), vocab)
    current_sequence = one_hot_encode(encoded_seed, len(vocab))
    generated_text = seed_text.split()

    for _ in range(n_generate):
        p, _ = rnn.forward(current_sequence)
        next_word_idx = np.random.choice(len(vocab), p=p.ravel())
        generated_text.append(inv_vocab[next_word_idx])
        current_sequence = current_sequence[1:] + [np.eye(len(vocab))[next_word_idx]]

    return ' '.join(generated_text)

# Example usage
text = """
The sun rose slowly over the misty hills, painting the sky in hues of pink and gold. Birds chirped their morning songs, welcoming the new day with melodious tunes. In the nearby village, people began to stir, ready to face whatever challenges lay ahead.

Sarah, a young artist, woke early and made her way to her favorite spot by the lake. She set up her easel and began to capture the serene landscape on canvas. The gentle lapping of water against the shore provided a soothing rhythm as she worked.

Meanwhile, in the bustling city miles away, Mark hurried through crowded streets, briefcase in hand. He weaved through the sea of commuters, all racing against time. The aroma of fresh coffee wafted from nearby cafes, tempting many to pause their rush for a moment of indulgence.

In a quiet library, an elderly man named George pored over ancient texts. His wrinkled hands carefully turned fragile pages, each one a window to forgotten worlds. The musty smell of old books filled the air, a scent he had grown to love over decades of study.

As noon approached, children laughed and played in the local park. Their joyful shouts echoed across the green expanse, a stark contrast to the somber atmosphere of the nearby courthouse. There, Laura, a dedicated lawyer, presented her case with passion and conviction, fighting for justice in a world often devoid of it.

As evening fell, families gathered around dinner tables, sharing stories of their day. The clinking of cutlery and warm conversations filled homes with a sense of togetherness. Outside, street lamps flickered to life, casting a soft glow over the quiet neighborhoods.

In a cozy bookshop at the corner of Main Street, Emma arranged a display of new arrivals. She loved introducing readers to new worlds and ideas, each book a portal to adventure or knowledge. The bell above the door chimed as customers wandered in, drawn by the promise of literary treasures.

As night descended, the city transformed. Neon lights buzzed to life, painting the streets in vibrant colors. Music drifted from bars and clubs, where people danced away their worries. In contrast, the countryside lay peaceful under a blanket of stars, crickets providing a gentle nocturnal symphony.

And so the world turned, each moment a story, each person a character in the grand narrative of life. From the highest mountains to the deepest oceans, countless tales unfolded, waiting to be told, heard, and remembered.
"""

# Preprocess the text
words = preprocess_text(text)
vocab = build_vocab(words)
rnn = RNN(len(vocab), hidden_size=100)

# Train the RNN
train_rnn(rnn, vocab, words, seq_length=5, epochs=100, learn_rate=1e-3)

# Generate text
seed_text = "The sun rose"
generated_text = generate_text(rnn, vocab, seed_text, n_generate=20)
print(f"Generated text: {generated_text}")

IndexError: index 2 is out of bounds for axis 1 with size 1

In [23]:
import numpy as np
import re
from collections import defaultdict

class RNN:
    def __init__(self, vocab_size, hidden_size):
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size

        # Initialize weights
        self.Wxh = np.random.randn(hidden_size, vocab_size) * 0.01
        self.Whh = np.random.randn(hidden_size, hidden_size) * 0.01
        self.Why = np.random.randn(vocab_size, hidden_size) * 0.01

        # Initialize biases
        self.bh = np.zeros((hidden_size, 1))
        self.by = np.zeros((vocab_size, 1))

    def forward(self, inputs):
        h = np.zeros((self.hidden_size, 1))
        self.last_inputs = inputs
        self.last_hs = { 0: h }

        # Forward pass
        for t, x in enumerate(inputs):
            h = np.tanh(np.dot(self.Wxh, x.reshape(-1, 1)) + np.dot(self.Whh, h) + self.bh)
            self.last_hs[t+1] = h

        # Compute output
        y = np.dot(self.Why, h) + self.by
        p = np.exp(y) / np.sum(np.exp(y))

        return p, h

    def backward(self, d_y, learn_rate=1e-3):
        n = len(self.last_inputs)

        # Backprop into Why and by
        d_Why = np.dot(d_y, self.last_hs[n].T)
        d_by = d_y

        # Backprop into Whh, Wxh, and bh
        d_h = np.dot(self.Why.T, d_y)
        d_Whh = np.zeros_like(self.Whh)
        d_Wxh = np.zeros_like(self.Wxh)
        d_bh = np.zeros_like(self.bh)

        for t in reversed(range(n)):
            temp = (1 - self.last_hs[t+1] ** 2) * d_h
            d_bh += temp
            d_Whh += np.dot(temp, self.last_hs[t].T)
            d_Wxh += np.dot(temp, self.last_inputs[t].reshape(1, -1))
            d_h = np.dot(self.Whh.T, temp)

        # Clip to prevent exploding gradients
        for d in [d_Wxh, d_Whh, d_Why, d_bh, d_by]:
            np.clip(d, -5, 5, out=d)

        # Update weights and biases
        self.Wxh -= learn_rate * d_Wxh
        self.Whh -= learn_rate * d_Whh
        self.Why -= learn_rate * d_Why
        self.bh -= learn_rate * d_bh
        self.by -= learn_rate * d_by

def preprocess_text(text):
    # Convert to lowercase and remove special characters
    text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
    words = text.split()
    return words

def build_vocab(words):
    vocab = defaultdict(lambda: len(vocab))
    vocab['<unk>'] = 0  # Unknown token
    for word in words:
        vocab[word]
    return dict(vocab)

def encode_text(text, vocab):
    return [vocab[word] if word in vocab else vocab['<unk>'] for word in text]

def one_hot_encode(indices, vocab_size):
    return [np.eye(vocab_size)[i] for i in indices]

def train_rnn(rnn, vocab, text, seq_length, epochs=100, learn_rate=1e-3):
    encoded_text = encode_text(text, vocab)
    vocab_size = len(vocab)

    for epoch in range(epochs):
        total_loss = 0
        for i in range(0, len(encoded_text) - seq_length, seq_length):
            inputs = one_hot_encode(encoded_text[i:i+seq_length], vocab_size)
            targets = encoded_text[i+1:i+seq_length+1]

            loss = 0
            h = np.zeros((rnn.hidden_size, 1))

            # Forward pass
            for t in range(seq_length):
                p, h = rnn.forward([inputs[t]])
                loss += -np.log(p[targets[t], 0])

            total_loss += loss

            # Backward pass
            d_y = p.copy()
            d_y[targets[-1]] -= 1
            rnn.backward(d_y, learn_rate)

        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Loss: {total_loss / (len(encoded_text) // seq_length)}")

def generate_text(rnn, vocab, seed_text, n_generate=100):
    inv_vocab = {v: k for k, v in vocab.items()}
    encoded_seed = encode_text(seed_text.split(), vocab)
    current_sequence = one_hot_encode(encoded_seed, len(vocab))
    generated_text = seed_text.split()

    h = np.zeros((rnn.hidden_size, 1))
    for _ in range(n_generate):
        p, h = rnn.forward([current_sequence[-1]])
        next_word_idx = np.random.choice(len(vocab), p=p.ravel())
        generated_text.append(inv_vocab[next_word_idx])
        current_sequence = current_sequence[1:] + [np.eye(len(vocab))[next_word_idx]]

    return ' '.join(generated_text)

# Example usage
text = """
The sun rose slowly over the misty hills, painting the sky in hues of pink and gold. Birds chirped their morning songs, welcoming the new day with melodious tunes. In the nearby village, people began to stir, ready to face whatever challenges lay ahead.

Sarah, a young artist, woke early and made her way to her favorite spot by the lake. She set up her easel and began to capture the serene landscape on canvas. The gentle lapping of water against the shore provided a soothing rhythm as she worked.

Meanwhile, in the bustling city miles away, Mark hurried through crowded streets, briefcase in hand. He weaved through the sea of commuters, all racing against time. The aroma of fresh coffee wafted from nearby cafes, tempting many to pause their rush for a moment of indulgence.

In a quiet library, an elderly man named George pored over ancient texts. His wrinkled hands carefully turned fragile pages, each one a window to forgotten worlds. The musty smell of old books filled the air, a scent he had grown to love over decades of study.

As noon approached, children laughed and played in the local park. Their joyful shouts echoed across the green expanse, a stark contrast to the somber atmosphere of the nearby courthouse. There, Laura, a dedicated lawyer, presented her case with passion and conviction, fighting for justice in a world often devoid of it.

As evening fell, families gathered around dinner tables, sharing stories of their day. The clinking of cutlery and warm conversations filled homes with a sense of togetherness. Outside, street lamps flickered to life, casting a soft glow over the quiet neighborhoods.

In a cozy bookshop at the corner of Main Street, Emma arranged a display of new arrivals. She loved introducing readers to new worlds and ideas, each book a portal to adventure or knowledge. The bell above the door chimed as customers wandered in, drawn by the promise of literary treasures.

As night descended, the city transformed. Neon lights buzzed to life, painting the streets in vibrant colors. Music drifted from bars and clubs, where people danced away their worries. In contrast, the countryside lay peaceful under a blanket of stars, crickets providing a gentle nocturnal symphony.

And so the world turned, each moment a story, each person a character in the grand narrative of life. From the highest mountains to the deepest oceans, countless tales unfolded, waiting to be told, heard, and remembered.
"""

# Preprocess the text
words = preprocess_text(text)
vocab = build_vocab(words)
rnn = RNN(len(vocab), hidden_size=100)

# Train the RNN
train_rnn(rnn, vocab, words, seq_length=5, epochs=100, learn_rate=1e-3)

# Generate text
seed_text = "The sun rose"
generated_text = generate_text(rnn, vocab, seed_text, n_generate=20)
print(f"Generated text: {generated_text}")

Epoch 0, Loss: 27.537057032677655
Epoch 10, Loss: 27.479247470914963
Epoch 20, Loss: 27.421969269105535
Epoch 30, Loss: 27.365211191709772
Epoch 40, Loss: 27.30896033607901
Epoch 50, Loss: 27.253199339220355
Epoch 60, Loss: 27.197903394948863
Epoch 70, Loss: 27.143036953069767
Epoch 80, Loss: 27.08854999512541
Epoch 90, Loss: 27.03437381461592
Generated text: The sun rose or racing wrinkled bars on sense character cutlery shore quiet man study moment conversations soft elderly waiting whatever street over


In [25]:
seed_text = "we"
generated_text = generate_text(rnn, vocab, seed_text, n_generate=20)
print(f"Generated text: {generated_text}")

Generated text: we to many courthouse introducing the transformed songs often customers there elderly casting drawn character outside lake made decades birds lapping
