In [3]:
# 📘 Simple LSTM Text Generation Model in Jupyter
# Make sure to install dependencies if running for the first time
# !pip install tensorflow

import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# 🔤 Sample training text (You can expand this with more text on any topic)
data = """Technology is changing the way we live. Artificial Intelligence and Machine Learning are revolutionizing industries. 
Nature is a beautiful part of Earth. Forests, rivers, and wildlife help maintain balance. 
Education empowers individuals and helps build societies."""

# 🧼 Preprocess text
tokenizer = Tokenizer()
tokenizer.fit_on_texts([data])
word_index = tokenizer.word_index
total_words = len(word_index) + 1

# Create input sequences
input_sequences = []
for line in data.split('.'):
    tokens = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(tokens)):
        n_gram_sequence = tokens[:i+1]
        input_sequences.append(n_gram_sequence)

# Pad sequences
max_seq_len = max([len(seq) for seq in input_sequences])
input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_seq_len, padding='pre'))

# Split inputs and labels
xs = input_sequences[:, :-1]
labels = input_sequences[:, -1]
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

# 🏗 Build LSTM model
model = Sequential()
model.add(Embedding(total_words, 10))
model.add(LSTM(64))
model.add(Dense(total_words, activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# 📈 Train model
model.fit(xs, ys, epochs=500, verbose=0)

# ✍️ Function to generate text
def generate_text(seed_text, next_words=10):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_seq_len-1, padding='pre')
        predicted = np.argmax(model.predict(token_list, verbose=0), axis=-1)
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                seed_text += " " + word
                break
    return seed_text

# 🎯 Example prompts
print("🧠 Generated text on prompt 'Technology is':")
print(generate_text("Technology is"))

print("\n🌿 Generated text on prompt 'Nature is':")
print(generate_text("Nature is"))

print("\n📚 Generated text on prompt 'Education helps':")
print(generate_text("Education helps"))


🧠 Generated text on prompt 'Technology is':
Technology is changing the way we live industries industries societies societies societies

🌿 Generated text on prompt 'Nature is':
Nature is a beautiful part of earth revolutionizing industries revolutionizing revolutionizing industries

📚 Generated text on prompt 'Education helps':
Education helps individuals and helps build societies balance balance industries industries societies
