<a href="https://colab.research.google.com/github/vineetdave/LangChainTutorials/blob/main/LSTM_Word_Generation_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Cell 1: Import Libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense  # <-- We've replaced SimpleRNN with LSTM
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical
import warnings
warnings.filterwarnings('ignore', category=Warning)

In [2]:
#-------------------------------------------------------------------------------
# Cell 2: Define Sample Data
# We can use the same data as before.
data = """
Once upon a time in a land far away, there lived a brave knight.
This knight was known for his courage and his kindness.
One day, the king asked the knight to save the princess from a dragon.
The knight accepted the challenge and rode his horse to the dragon's lair.
The brave knight fought the dragon and saved the princess.
They returned to the castle and lived happily ever after.
"""

print("Data loaded.")

Data loaded.


In [3]:
#-------------------------------------------------------------------------------
# Cell 3: Tokenize the Text
# This step is exactly the same as before.
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
vocab_size = len(tokenizer.word_index) + 1
encoded_text = tokenizer.texts_to_sequences([data])[0]

print(f"Total unique words (vocab size): {vocab_size}")

Total unique words (vocab size): 45


In [4]:
#-------------------------------------------------------------------------------
# Cell 4: Create Input Sequences and Targets
# This step is also exactly the same.
sequences = []
seq_length = 5  # The model will read 5 words to predict the 6th

for i in range(seq_length, len(encoded_text)):
    seq = encoded_text[i-seq_length:i]
    label = encoded_text[i]
    sequences.append((seq, label))

print(f"Total number of sequences created: {len(sequences)}")

Total number of sequences created: 66


In [5]:
#-------------------------------------------------------------------------------
# Cell 5: Prepare Data for Keras
# This is also identical to the previous script.
X, y = zip(*sequences)
X = np.array(X)
y = to_categorical(y, num_classes=vocab_size)

print(f"Shape of X (inputs): {X.shape}")
print(f"Shape of y (targets): {y.shape}")

Shape of X (inputs): (66, 5)
Shape of y (targets): (66, 45)


In [6]:
#-------------------------------------------------------------------------------
# Cell 6: Define the LSTM Model Architecture
# This is the main change. We are swapping SimpleRNN for LSTM.

embedding_dim = 50
lstm_units = 100  # We'll use 100 memory units, just like before

model = Sequential()

# Embedding Layer (same as before)
model.add(Embedding(input_dim=vocab_size,
                    output_dim=embedding_dim,
                    input_length=seq_length))

# *** THE KEY CHANGE IS HERE ***
# We use an LSTM layer instead of a SimpleRNN layer.
# The LSTM is much better at remembering long-term patterns
# and avoids the vanishing gradient problem.
model.add(LSTM(lstm_units))
# Note: The default activation for LSTM is 'tanh', which is standard.

# Output Layer (same as before)
model.add(Dense(vocab_size, activation='softmax'))

model.summary()

In [7]:
#-------------------------------------------------------------------------------
# Cell 7: Compile and Train the Model
# The compilation and training process is exactly the same.

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Starting LSTM model training...")
# With a more complex model like LSTM, you may need even more epochs
# to get good results.
model.fit(X, y, epochs=100, batch_size=5, verbose=2)
print("Model training complete.")


Starting LSTM model training...
Epoch 1/100
14/14 - 3s - 205ms/step - accuracy: 0.0758 - loss: 3.8054
Epoch 2/100
14/14 - 0s - 8ms/step - accuracy: 0.1515 - loss: 3.7772
Epoch 3/100
14/14 - 0s - 13ms/step - accuracy: 0.1515 - loss: 3.7350
Epoch 4/100
14/14 - 0s - 19ms/step - accuracy: 0.1515 - loss: 3.6325
Epoch 5/100
14/14 - 0s - 9ms/step - accuracy: 0.1515 - loss: 3.4431
Epoch 6/100
14/14 - 0s - 9ms/step - accuracy: 0.1515 - loss: 3.3185
Epoch 7/100
14/14 - 0s - 8ms/step - accuracy: 0.1515 - loss: 3.2601
Epoch 8/100
14/14 - 0s - 8ms/step - accuracy: 0.1667 - loss: 3.1619
Epoch 9/100
14/14 - 0s - 8ms/step - accuracy: 0.1667 - loss: 3.1245
Epoch 10/100
14/14 - 0s - 9ms/step - accuracy: 0.1818 - loss: 2.9773
Epoch 11/100
14/14 - 0s - 8ms/step - accuracy: 0.2121 - loss: 2.8511
Epoch 12/100
14/14 - 0s - 9ms/step - accuracy: 0.2273 - loss: 2.7324
Epoch 13/100
14/14 - 0s - 8ms/step - accuracy: 0.2424 - loss: 2.6146
Epoch 14/100
14/14 - 0s - 9ms/step - accuracy: 0.3030 - loss: 2.4872
Epoch 1

In [8]:
#-------------------------------------------------------------------------------
# Cell 8: Define the Text Generation Function
# This function is 100% identical to the one from the SimpleRNN script.
# Because the model's input and output shapes are the same,
# we don't need to change anything.

def generate_text(seed_text, n_words):
    generated_text = seed_text
    current_text = seed_text

    int_to_word = {v: k for k, v in tokenizer.word_index.items()}

    for _ in range(n_words):
        encoded = tokenizer.texts_to_sequences([current_text])[0]
        encoded = pad_sequences([encoded], maxlen=seq_length, truncating='pre')

        y_pred_probs = model.predict(encoded, verbose=0)[0]
        y_pred_index = np.argmax(y_pred_probs)

        out_word = int_to_word.get(y_pred_index, '?')

        current_text += " " + out_word
        generated_text += " " + out_word

    return generated_text

print("Text generation function defined.")

Text generation function defined.


In [9]:
#-------------------------------------------------------------------------------
# Cell 9: Generate New Text
# Now we can test our new LSTM-powered model.

seed_text = "the knight went to the"
generated = generate_text(seed_text, 20) # Generate 20 new words

print("\n--- SEED TEXT ---")
print(seed_text)
print("\n--- GENERATED TEXT (from LSTM) ---")
print(generated)


--- SEED TEXT ---
the knight went to the

--- GENERATED TEXT (from LSTM) ---
the knight went to the princess princess from brave the knight knight was and and rode rode his the the dragon's dragon's lair the knight
