In [6]:
#training a RNN model from scratch 
#1 create a encoding 
#one hot encoding
def one_hot_encoding(input):
    a={c:[1 if i==idx else 0 for i in range(len(input))]
       for idx,c in enumerate(input)}
    return a
one_hot_encoding("HELLO")

{'H': [1, 0, 0, 0, 0],
 'E': [0, 1, 0, 0, 0],
 'L': [0, 0, 0, 1, 0],
 'O': [0, 0, 0, 0, 1]}

In [13]:
import numpy as np

USING TENSORFLOW (INBUILT MODELS)

In [78]:
import tensorflow as tf
import numpy as np

# One-hot encoding
def one_hot_encoding(input_sequence):
    unique_chars = sorted(set(input_sequence))
    char_to_idx = {char: idx for idx, char in enumerate(unique_chars)}
    idx_to_char = {idx: char for char, idx in char_to_idx.items()}
    return char_to_idx, idx_to_char

# Prepare sequences
def prepare_sequences(sequence, char_to_idx):
    inputs = sequence[:-1]
    targets = sequence[1:]
    input_indices = [char_to_idx[char] for char in inputs]
    target_indices = [char_to_idx[char] for char in targets]
    return input_indices, target_indices

# Build the RNN model using TensorFlow's pre-built SimpleRNN
def build_rnn_model(vocab_size, embedding_dim, hidden_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim),
        tf.keras.layers.SimpleRNN(hidden_units, return_sequences=True, return_state=False),
        tf.keras.layers.Dense(vocab_size, activation='softmax')
    ])
    return model

# Generate text
def generate_text(model, start_char, char_to_idx, idx_to_char, length=20):
    input_idx = np.array([char_to_idx[start_char]])
    result = start_char

    for _ in range(length):
        predictions = model.predict(input_idx.reshape(1, -1), verbose=0)
        predicted_idx = np.argmax(predictions[0, -1])
        result += idx_to_char[predicted_idx]
        input_idx = np.array([predicted_idx])

    return result

# Example usage
if __name__ == "__main__":
    input_sequence = "hello world"
    char_to_idx, idx_to_char = one_hot_encoding(input_sequence)
    inputs, targets = prepare_sequences(input_sequence, char_to_idx)
    
    # Parameters
    vocab_size = len(char_to_idx)
    embedding_dim = 16
    hidden_units = 50
    epochs = 100
    learning_rate = 0.001

    # Convert inputs/targets to tensors
    inputs = np.array(inputs)
    targets = np.array(targets)

    # Reshape inputs to match the expected shape for RNN: (batch_size, timesteps, input_dim)
    inputs = inputs.reshape((inputs.shape[0], 1))  # Adding a 'timesteps' axis
    targets = targets.reshape((targets.shape[0], 1))  # Adding a 'timesteps' axis

    # Create dataset and batch it
    dataset = tf.data.Dataset.from_tensor_slices((inputs, targets))
    dataset = dataset.batch(1, drop_remainder=True).shuffle(100)  # Batch size of 1 and shuffle

    # Build and compile the model
    model = build_rnn_model(vocab_size, embedding_dim, hidden_units)
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy')

    # Train the model
    for epoch in range(epochs):
        for batch_inputs, batch_targets in dataset:
            history = model.fit(batch_inputs, batch_targets, verbose=0)
        print(f"Epoch {epoch + 1}/{epochs}, Loss: {history.history['loss'][0]}")

    # Generate text
    print(generate_text(model, "h", char_to_idx, idx_to_char, length=20))


Epoch 1/100, Loss: 2.099057912826538
Epoch 2/100, Loss: 1.9969029426574707
Epoch 3/100, Loss: 1.9822297096252441
Epoch 4/100, Loss: 2.064483404159546
Epoch 5/100, Loss: 2.036527156829834
Epoch 6/100, Loss: 2.0194830894470215
Epoch 7/100, Loss: 1.8088974952697754
Epoch 8/100, Loss: 2.0059268474578857
Epoch 9/100, Loss: 1.9244847297668457
Epoch 10/100, Loss: 1.814267873764038
Epoch 11/100, Loss: 2.022545337677002
Epoch 12/100, Loss: 1.8352160453796387
Epoch 13/100, Loss: 1.6465314626693726
Epoch 14/100, Loss: 1.7519997358322144
Epoch 15/100, Loss: 1.5370376110076904
Epoch 16/100, Loss: 1.6986160278320312
Epoch 17/100, Loss: 1.6926426887512207
Epoch 18/100, Loss: 1.6260029077529907
Epoch 19/100, Loss: 1.2688549757003784
Epoch 20/100, Loss: 0.6693180799484253
Epoch 21/100, Loss: 1.196740746498108
Epoch 22/100, Loss: 1.2172878980636597
Epoch 23/100, Loss: 1.2028616666793823
Epoch 24/100, Loss: 1.2921171188354492
Epoch 25/100, Loss: 0.9955384135246277
Epoch 26/100, Loss: 1.158467411994934
Ep

Coding from scratch

In [80]:
import numpy as np

# Activation functions
def relu(x):
    return np.maximum(0, x)

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def tanh(x):
    return np.tanh(x)

def softmax(x):
    exp_x = np.exp(x - np.max(x))  # For numerical stability
    return exp_x / exp_x.sum(axis=0)

# One-hot encoding
def one_hot_encoding(input_sequence):
    unique_chars = sorted(set(input_sequence))
    char_to_idx = {char: idx for idx, char in enumerate(unique_chars)}
    idx_to_char = {idx: char for char, idx in char_to_idx.items()}
    
    one_hot_dict = {
        char: [1 if i == char_to_idx[char] else 0 for i in range(len(unique_chars))]
        for char in unique_chars
    }
    return char_to_idx, idx_to_char, one_hot_dict

# Prepare sequences
def prepare_sequences(sequence, char_to_idx):
    inputs = sequence[:-1]
    targets = sequence[1:]
    return inputs, [char_to_idx[char] for char in targets]

# RNN Class
class RNN:
    def __init__(self, input_size, hidden_size, output_size, learning_rate=0.01):
        self.hidden_size = hidden_size
        self.learning_rate = learning_rate
        
        # Weight matrices with Xavier Initialization
        self.wih = np.random.randn(hidden_size, input_size) * np.sqrt(2. / input_size)
        self.whh = np.random.randn(hidden_size, hidden_size) * np.sqrt(2. / hidden_size)
        self.who = np.random.randn(output_size, hidden_size) * np.sqrt(2. / hidden_size)
        
        # Biases
        self.bh = np.zeros((hidden_size, 1))  # Hidden bias
        self.bo = np.zeros((output_size, 1))  # Output bias

    def forward(self, x, h_prev):
        h_next = tanh(np.dot(self.wih, x) + np.dot(self.whh, h_prev) + self.bh)
        y = np.dot(self.who, h_next) + self.bo
        y = softmax(y)
        return y, h_next

    def train(self, inputs, targets, one_hot_dict, epochs=100):
        vocab_size = len(one_hot_dict)
        loss_history = []
        
        for epoch in range(epochs):
            h_prev = np.zeros((self.hidden_size, 1))  # Reset hidden state
            total_loss = 0
            dwih, dwhh, dwho = np.zeros_like(self.wih), np.zeros_like(self.whh), np.zeros_like(self.who)
            dbh, dbo = np.zeros_like(self.bh), np.zeros_like(self.bo)
            xs, hs, ys, ps = {}, {}, {}, {}
            hs[-1] = np.copy(h_prev)
            
            # Forward pass
            for t in range(len(inputs)):
                xs[t] = np.array(one_hot_dict[inputs[t]]).reshape(-1, 1)  # One-hot
                ys[t] = np.zeros((vocab_size, 1))
                ys[t][targets[t]] = 1  # True label
                
                ps[t], hs[t] = self.forward(xs[t], hs[t-1])
                total_loss += -np.log(ps[t][targets[t], 0])  # Cross-entropy loss
            
            # Backward pass
            dh_next = np.zeros_like(hs[0])
            for t in reversed(range(len(inputs))):
                dy = ps[t] - ys[t]  # Gradient of output
                dwho += np.dot(dy, hs[t].T)
                dbo += dy
                
                dh = np.dot(self.who.T, dy) + dh_next  # Backprop through hidden
                dh_raw = dh * (1 - hs[t]**2)  # tanh derivative
                dwih += np.dot(dh_raw, xs[t].T)
                dwhh += np.dot(dh_raw, hs[t-1].T)
                dbh += dh_raw
                
                dh_next = np.dot(self.whh.T, dh_raw)
          
            # Gradient clipping
            for dparam in [dwih, dwhh, dwho, dbh, dbo]:
                np.clip(dparam, -1, 1, out=dparam)
         
            # Update parameters
            self.wih -= self.learning_rate * dwih
            self.whh -= self.learning_rate * dwhh
            self.who -= self.learning_rate * dwho
            self.bh -= self.learning_rate * dbh
            self.bo -= self.learning_rate * dbo

            loss_history.append(total_loss / len(inputs))
            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(inputs)}")
        
        return loss_history

# Text Generation
def generate_text(rnn, start_char, char_to_idx, idx_to_char, one_hot_dict, length=10):
    h_prev = np.zeros((rnn.hidden_size, 1))
    x = np.array(one_hot_dict[start_char]).reshape(-1, 1)
    result = start_char

    for _ in range(length):
        y, h_prev = rnn.forward(x, h_prev)
        idx = np.argmax(y)
        result += idx_to_char[idx]
        x = np.array(one_hot_dict[idx_to_char[idx]]).reshape(-1, 1)
    
    return result

# Example usage
if __name__ == "__main__":
    input_sequence = "hello world"
    char_to_idx, idx_to_char, one_hot_dict = one_hot_encoding(input_sequence)
    inputs, targets = prepare_sequences(input_sequence, char_to_idx)
    rnn = RNN(input_size=len(one_hot_dict), hidden_size=50, output_size=len(one_hot_dict), learning_rate=0.001)
    loss_history = rnn.train(inputs, targets, one_hot_dict, epochs=100)
    print(generate_text(rnn, "h", char_to_idx, idx_to_char, one_hot_dict, length=10))


Epoch 1/100, Loss: 2.5316583343958676
Epoch 2/100, Loss: 2.464115654074246
Epoch 3/100, Loss: 2.3968181145447116
Epoch 4/100, Loss: 2.329754072605854
Epoch 5/100, Loss: 2.2629374661225383
Epoch 6/100, Loss: 2.1964793261498614
Epoch 7/100, Loss: 2.1306520311081742
Epoch 8/100, Loss: 2.065925667295679
Epoch 9/100, Loss: 2.0029440539907593
Epoch 10/100, Loss: 1.9423643461724809
Epoch 11/100, Loss: 1.8847049381374004
Epoch 12/100, Loss: 1.8302838070288139
Epoch 13/100, Loss: 1.7791072793611726
Epoch 14/100, Loss: 1.7310137637275322
Epoch 15/100, Loss: 1.685711713876372
Epoch 16/100, Loss: 1.642909151028769
Epoch 17/100, Loss: 1.6023491364546836
Epoch 18/100, Loss: 1.563818492703086
Epoch 19/100, Loss: 1.5271488808703897
Epoch 20/100, Loss: 1.4922145622176797
Epoch 21/100, Loss: 1.4588863107458878
Epoch 22/100, Loss: 1.4270411706003165
Epoch 23/100, Loss: 1.396568938954962
Epoch 24/100, Loss: 1.367370590839457
Epoch 25/100, Loss: 1.3393532389430856
Epoch 26/100, Loss: 1.3124337351008912
Epo

In [83]:
if __name__ == "__main__":
    input_sequence = "world"
    char_to_idx, idx_to_char, one_hot_dict = one_hot_encoding(input_sequence)
    inputs, targets = prepare_sequences(input_sequence, char_to_idx)
    rnn = RNN(input_size=len(one_hot_dict), hidden_size=50, output_size=len(one_hot_dict), learning_rate=0.001)
    loss_history = rnn.train(inputs, targets, one_hot_dict, epochs=100)
    print(generate_text(rnn, "w", char_to_idx, idx_to_char, one_hot_dict, length=5))

Epoch 1/100, Loss: 2.1272165954323112
Epoch 2/100, Loss: 2.0885484291638075
Epoch 3/100, Loss: 2.0506197002498396
Epoch 4/100, Loss: 2.0134290606998166
Epoch 5/100, Loss: 1.976972717075563
Epoch 6/100, Loss: 1.9412456915381562
Epoch 7/100, Loss: 1.9062424345022408
Epoch 8/100, Loss: 1.8719538961211246
Epoch 9/100, Loss: 1.838368943432236
Epoch 10/100, Loss: 1.8054772150883414
Epoch 11/100, Loss: 1.7732654921031372
Epoch 12/100, Loss: 1.7417197709080803
Epoch 13/100, Loss: 1.7108255419879541
Epoch 14/100, Loss: 1.680571502248358
Epoch 15/100, Loss: 1.6509462538487027
Epoch 16/100, Loss: 1.6219379840399695
Epoch 17/100, Loss: 1.5935326515814212
Epoch 18/100, Loss: 1.5657175998712753
Epoch 19/100, Loss: 1.5384792472459052
Epoch 20/100, Loss: 1.5118030671733569
Epoch 21/100, Loss: 1.485674965598621
Epoch 22/100, Loss: 1.4600821426209145
Epoch 23/100, Loss: 1.4350125224911392
Epoch 24/100, Loss: 1.4104554861532976
Epoch 25/100, Loss: 1.3863992278366537
Epoch 26/100, Loss: 1.3628338845549097

In [85]:
if __name__ == "__main__":
    input_sequence = "apple"
    char_to_idx, idx_to_char, one_hot_dict = one_hot_encoding(input_sequence)
    inputs, targets = prepare_sequences(input_sequence, char_to_idx)
    rnn = RNN(input_size=len(one_hot_dict), hidden_size=50, output_size=len(one_hot_dict), learning_rate=0.001)
    loss_history = rnn.train(inputs, targets, one_hot_dict, epochs=100)
    print(generate_text(rnn, "a", char_to_idx, idx_to_char, one_hot_dict, length=5))

Epoch 1/100, Loss: 2.155918115140609
Epoch 2/100, Loss: 2.1083164614807512
Epoch 3/100, Loss: 2.0615253038967634
Epoch 4/100, Loss: 2.0155844447840248
Epoch 5/100, Loss: 1.9705295873959177
Epoch 6/100, Loss: 1.9263850711016877
Epoch 7/100, Loss: 1.8831666320906024
Epoch 8/100, Loss: 1.8408844522124166
Epoch 9/100, Loss: 1.7995391085285588
Epoch 10/100, Loss: 1.75912871638921
Epoch 11/100, Loss: 1.719647615191402
Epoch 12/100, Loss: 1.681079772265026
Epoch 13/100, Loss: 1.6434056594637552
Epoch 14/100, Loss: 1.6066048479796786
Epoch 15/100, Loss: 1.570655647101998
Epoch 16/100, Loss: 1.5355365307074407
Epoch 17/100, Loss: 1.5012266319292835
Epoch 18/100, Loss: 1.4677060670712216
Epoch 19/100, Loss: 1.4349605029898675
Epoch 20/100, Loss: 1.402974186283065
Epoch 21/100, Loss: 1.3717366099083659
Epoch 22/100, Loss: 1.3412375885128103
Epoch 23/100, Loss: 1.3114650298418438
Epoch 24/100, Loss: 1.2824112062442234
Epoch 25/100, Loss: 1.2540646590414553
Epoch 26/100, Loss: 1.2264126873398653
Ep

In [87]:
if __name__ == "__main__":
    input_sequence = "indigo"
    char_to_idx, idx_to_char, one_hot_dict = one_hot_encoding(input_sequence)
    inputs, targets = prepare_sequences(input_sequence, char_to_idx)
    rnn = RNN(input_size=len(one_hot_dict), hidden_size=50, output_size=len(one_hot_dict), learning_rate=0.001)
    loss_history = rnn.train(inputs, targets, one_hot_dict, epochs=100)
    print(generate_text(rnn, "i", char_to_idx, idx_to_char, one_hot_dict, length=5))

Epoch 1/100, Loss: 2.088831211144503
Epoch 2/100, Loss: 2.0487816029114994
Epoch 3/100, Loss: 2.009812342942497
Epoch 4/100, Loss: 1.9718749207335076
Epoch 5/100, Loss: 1.934918533780427
Epoch 6/100, Loss: 1.898897059314382
Epoch 7/100, Loss: 1.863759827459249
Epoch 8/100, Loss: 1.8294553291868838
Epoch 9/100, Loss: 1.7959371145798149
Epoch 10/100, Loss: 1.7631653677037515
Epoch 11/100, Loss: 1.7311005262602486
Epoch 12/100, Loss: 1.6997028209052583
Epoch 13/100, Loss: 1.6689365773431253
Epoch 14/100, Loss: 1.6387719904035811
Epoch 15/100, Loss: 1.609182003168873
Epoch 16/100, Loss: 1.5801421308059596
Epoch 17/100, Loss: 1.5516313225200626
Epoch 18/100, Loss: 1.5236341276565772
Epoch 19/100, Loss: 1.4961367794835714
Epoch 20/100, Loss: 1.469126544810782
Epoch 21/100, Loss: 1.4425927321840952
Epoch 22/100, Loss: 1.4165275326574558
Epoch 23/100, Loss: 1.390924701216705
Epoch 24/100, Loss: 1.3657816050033738
Epoch 25/100, Loss: 1.3410965533290913
Epoch 26/100, Loss: 1.3168662707949599
Epo